From 4cf37bb7d9dc6dfb5d5fca7f735ba65ba173dabc Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 5 Jan 2009 23:47:22 +0900 Subject: x86, swiotlb: add map_page and unmap_page This is a preparation of struct dma_mapping_ops unification. We use map_page and unmap_page instead of map_single and unmap_single. This is sorta temporary workaround. We will move them to lib/swiotlb.c to enable x86's swiotlb code to directly use them. We will remove map_single and unmap_single hooks in the last patch in this patchset. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-swiotlb_64.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index d59c91747665..d1c0366886dd 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -45,6 +45,23 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); } +/* these will be moved to lib/swiotlb.c later on */ + +static dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return swiotlb_map_single(dev, page_address(page) + offset, size, dir); +} + +static void swiotlb_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + swiotlb_unmap_single(dev, dma_handle, size, dir); +} + static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { @@ -71,6 +88,8 @@ struct dma_mapping_ops swiotlb_dma_ops = { .sync_sg_for_device = swiotlb_sync_sg_for_device, .map_sg = swiotlb_map_sg, .unmap_sg = swiotlb_unmap_sg, + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, .dma_supported = NULL, }; -- cgit v1.2.3 From 052aedbfb6cd4fd1a73010735da88a9dcc224673 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 5 Jan 2009 23:47:23 +0900 Subject: x86, gart: add map_page and unmap_page This is a preparation of struct dma_mapping_ops unification. We use map_page and unmap_page instead of map_single and unmap_single. We will remove map_single and unmap_single hooks in the last patch in this patchset. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 00c2bcd41463..e49c6dd0e8c6 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -255,10 +255,13 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, } /* Map a single area into the IOMMU */ -static dma_addr_t -gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) +static dma_addr_t gart_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) { unsigned long bus; + phys_addr_t paddr = page_to_phys(page) + offset; if (!dev) dev = &x86_dma_fallback_dev; @@ -272,11 +275,19 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) return bus; } +static dma_addr_t gart_map_single(struct device *dev, phys_addr_t paddr, + size_t size, int dir) +{ + return gart_map_page(dev, pfn_to_page(paddr >> PAGE_SHIFT), + paddr & ~PAGE_MASK, size, dir, NULL); +} + /* * Free a DMA mapping. */ -static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction) +static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { unsigned long iommu_page; int npages; @@ -295,6 +306,12 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, free_iommu(iommu_page, npages); } +static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, + size_t size, int direction) +{ + gart_unmap_page(dev, dma_addr, size, direction, NULL); +} + /* * Wrapper for pci_unmap_single working with scatterlists. */ @@ -712,6 +729,8 @@ static struct dma_mapping_ops gart_dma_ops = { .unmap_single = gart_unmap_single, .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, + .map_page = gart_map_page, + .unmap_page = gart_unmap_page, .alloc_coherent = gart_alloc_coherent, .free_coherent = gart_free_coherent, }; -- cgit v1.2.3 From 3991605c40b1059d0204d9fddfcf878429ab8948 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 5 Jan 2009 23:47:24 +0900 Subject: x86, calgary: add map_page and unmap_page This is a preparation of struct dma_mapping_ops unification. We use map_page and unmap_page instead of map_single and unmap_single. We will remove map_single and unmap_single hooks in the last patch in this patchset. Signed-off-by: FUJITA Tomonori Acked-by: Muli Ben-Yehuda Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index d28bbdc35e4e..e33cfcf1af5b 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -445,10 +445,12 @@ error: return 0; } -static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, - size_t size, int direction) +static dma_addr_t calgary_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) { - void *vaddr = phys_to_virt(paddr); + void *vaddr = page_address(page) + offset; unsigned long uaddr; unsigned int npages; struct iommu_table *tbl = find_iommu_table(dev); @@ -456,17 +458,32 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, uaddr = (unsigned long)vaddr; npages = iommu_num_pages(uaddr, size, PAGE_SIZE); - return iommu_alloc(dev, tbl, vaddr, npages, direction); + return iommu_alloc(dev, tbl, vaddr, npages, dir); } -static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, - size_t size, int direction) +static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, + size_t size, int direction) +{ + return calgary_map_page(dev, pfn_to_page(paddr >> PAGE_SHIFT), + paddr & ~PAGE_MASK, size, + direction, NULL); +} + +static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { struct iommu_table *tbl = find_iommu_table(dev); unsigned int npages; - npages = iommu_num_pages(dma_handle, size, PAGE_SIZE); - iommu_free(tbl, dma_handle, npages); + npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); + iommu_free(tbl, dma_addr, npages); +} + +static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, + size_t size, int direction) +{ + calgary_unmap_page(dev, dma_handle, size, direction, NULL); } static void* calgary_alloc_coherent(struct device *dev, size_t size, @@ -522,6 +539,8 @@ static struct dma_mapping_ops calgary_dma_ops = { .unmap_single = calgary_unmap_single, .map_sg = calgary_map_sg, .unmap_sg = calgary_unmap_sg, + .map_page = calgary_map_page, + .unmap_page = calgary_unmap_page, }; static inline void __iomem * busno_to_bbar(unsigned char num) -- cgit v1.2.3 From 51491367c2541c51a9d435eec88b0e846223fb59 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 5 Jan 2009 23:47:25 +0900 Subject: x86, AMD IOMMU: add map_page and unmap_page This is a preparation of struct dma_mapping_ops unification. We use map_page and unmap_page instead of map_single and unmap_single. We will remove map_single and unmap_single hooks in the last patch in this patchset. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 5113c080f0c4..85704418644a 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #ifdef CONFIG_IOMMU_API #include @@ -1297,8 +1298,10 @@ static void __unmap_single(struct amd_iommu *iommu, /* * The exported map_single function for dma_ops. */ -static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, - size_t size, int dir) +static dma_addr_t map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) { unsigned long flags; struct amd_iommu *iommu; @@ -1306,6 +1309,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, u16 devid; dma_addr_t addr; u64 dma_mask; + phys_addr_t paddr = page_to_phys(page) + offset; INC_STATS_COUNTER(cnt_map_single); @@ -1337,11 +1341,18 @@ out: return addr; } +static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, + size_t size, int dir) +{ + return map_page(dev, pfn_to_page(paddr >> PAGE_SHIFT), + paddr & ~PAGE_MASK, size, dir, NULL); +} + /* * The exported unmap_single function for dma_ops. */ -static void unmap_single(struct device *dev, dma_addr_t dma_addr, - size_t size, int dir) +static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) { unsigned long flags; struct amd_iommu *iommu; @@ -1367,6 +1378,12 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, spin_unlock_irqrestore(&domain->lock, flags); } +static void unmap_single(struct device *dev, dma_addr_t dma_addr, + size_t size, int dir) +{ + return unmap_page(dev, dma_addr, size, dir, NULL); +} + /* * This is a special map_sg function which is used if we should map a * device which is not handled by an AMD IOMMU in the system. @@ -1649,6 +1666,8 @@ static struct dma_mapping_ops amd_iommu_dma_ops = { .free_coherent = free_coherent, .map_single = map_single, .unmap_single = unmap_single, + .map_page = map_page, + .unmap_page = unmap_page, .map_sg = map_sg, .unmap_sg = unmap_sg, .dma_supported = amd_iommu_dma_supported, -- cgit v1.2.3 From 33feffd4525fc2e4dd0a322fb5d07d61f85d791e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 5 Jan 2009 23:47:27 +0900 Subject: x86, pci-nommu: add map_page This is a preparation of struct dma_mapping_ops unification. We use map_page and unmap_page instead of map_single and unmap_single. We will remove map_single hook in the last patch in this patchset. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index c70ab5a5d4c8..5a73a824ac1c 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -25,18 +25,25 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) return 1; } -static dma_addr_t -nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, - int direction) +static dma_addr_t nommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) { - dma_addr_t bus = paddr; + dma_addr_t bus = page_to_phys(page) + offset; WARN_ON(size == 0); - if (!check_addr("map_single", hwdev, bus, size)) - return bad_dma_address; + if (!check_addr("map_single", dev, bus, size)) + return bad_dma_address; flush_write_buffers(); return bus; } +static dma_addr_t nommu_map_single(struct device *hwdev, phys_addr_t paddr, + size_t size, int direction) +{ + return nommu_map_page(hwdev, pfn_to_page(paddr >> PAGE_SHIFT), + paddr & ~PAGE_MASK, size, direction, NULL); +} /* Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scatter-gather version of the @@ -83,6 +90,7 @@ struct dma_mapping_ops nommu_dma_ops = { .free_coherent = nommu_free_coherent, .map_single = nommu_map_single, .map_sg = nommu_map_sg, + .map_page = nommu_map_page, .is_phys = 1, }; -- cgit v1.2.3 From d7dff84053524186b139342ac66a4160ce6bb517 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 5 Jan 2009 23:47:28 +0900 Subject: x86: remove map_single and unmap_single in struct dma_mapping_ops This patch converts dma_map_single and dma_unmap_single to use map_page and unmap_page respectively and removes unnecessary map_single and unmap_single in struct dma_mapping_ops. This leaves intel-iommu's dma_map_single and dma_unmap_single since IA64 uses them. They will be removed after the unification. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/include/asm/dma-mapping.h | 15 ++++++--------- arch/x86/kernel/amd_iommu.c | 15 --------------- arch/x86/kernel/pci-calgary_64.c | 16 ---------------- arch/x86/kernel/pci-gart_64.c | 19 ++----------------- arch/x86/kernel/pci-nommu.c | 8 -------- arch/x86/kernel/pci-swiotlb_64.c | 9 --------- drivers/pci/intel-iommu.c | 2 -- 7 files changed, 8 insertions(+), 76 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 3fe05046fb31..b81f82268a16 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -24,10 +24,6 @@ struct dma_mapping_ops { dma_addr_t *dma_handle, gfp_t gfp); void (*free_coherent)(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); - dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr, - size_t size, int direction); - void (*unmap_single)(struct device *dev, dma_addr_t addr, - size_t size, int direction); void (*sync_single_for_cpu)(struct device *hwdev, dma_addr_t dma_handle, size_t size, int direction); @@ -103,7 +99,9 @@ dma_map_single(struct device *hwdev, void *ptr, size_t size, struct dma_mapping_ops *ops = get_dma_ops(hwdev); BUG_ON(!valid_dma_direction(direction)); - return ops->map_single(hwdev, virt_to_phys(ptr), size, direction); + return ops->map_page(hwdev, virt_to_page(ptr), + (unsigned long)ptr & ~PAGE_MASK, size, + direction, NULL); } static inline void @@ -113,8 +111,8 @@ dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, struct dma_mapping_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(direction)); - if (ops->unmap_single) - ops->unmap_single(dev, addr, size, direction); + if (ops->unmap_page) + ops->unmap_page(dev, addr, size, direction, NULL); } static inline int @@ -221,8 +219,7 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, struct dma_mapping_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(direction)); - return ops->map_single(dev, page_to_phys(page) + offset, - size, direction); + return ops->map_page(dev, page, offset, size, direction, NULL); } static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 85704418644a..a5dedb690a9a 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1341,13 +1341,6 @@ out: return addr; } -static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, - size_t size, int dir) -{ - return map_page(dev, pfn_to_page(paddr >> PAGE_SHIFT), - paddr & ~PAGE_MASK, size, dir, NULL); -} - /* * The exported unmap_single function for dma_ops. */ @@ -1378,12 +1371,6 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, spin_unlock_irqrestore(&domain->lock, flags); } -static void unmap_single(struct device *dev, dma_addr_t dma_addr, - size_t size, int dir) -{ - return unmap_page(dev, dma_addr, size, dir, NULL); -} - /* * This is a special map_sg function which is used if we should map a * device which is not handled by an AMD IOMMU in the system. @@ -1664,8 +1651,6 @@ static void prealloc_protection_domains(void) static struct dma_mapping_ops amd_iommu_dma_ops = { .alloc_coherent = alloc_coherent, .free_coherent = free_coherent, - .map_single = map_single, - .unmap_single = unmap_single, .map_page = map_page, .unmap_page = unmap_page, .map_sg = map_sg, diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index e33cfcf1af5b..756138b604e1 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -461,14 +461,6 @@ static dma_addr_t calgary_map_page(struct device *dev, struct page *page, return iommu_alloc(dev, tbl, vaddr, npages, dir); } -static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, - size_t size, int direction) -{ - return calgary_map_page(dev, pfn_to_page(paddr >> PAGE_SHIFT), - paddr & ~PAGE_MASK, size, - direction, NULL); -} - static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) @@ -480,12 +472,6 @@ static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr, iommu_free(tbl, dma_addr, npages); } -static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, - size_t size, int direction) -{ - calgary_unmap_page(dev, dma_handle, size, direction, NULL); -} - static void* calgary_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { @@ -535,8 +521,6 @@ static void calgary_free_coherent(struct device *dev, size_t size, static struct dma_mapping_ops calgary_dma_ops = { .alloc_coherent = calgary_alloc_coherent, .free_coherent = calgary_free_coherent, - .map_single = calgary_map_single, - .unmap_single = calgary_unmap_single, .map_sg = calgary_map_sg, .unmap_sg = calgary_unmap_sg, .map_page = calgary_map_page, diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index e49c6dd0e8c6..9c557c0c928c 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -275,13 +275,6 @@ static dma_addr_t gart_map_page(struct device *dev, struct page *page, return bus; } -static dma_addr_t gart_map_single(struct device *dev, phys_addr_t paddr, - size_t size, int dir) -{ - return gart_map_page(dev, pfn_to_page(paddr >> PAGE_SHIFT), - paddr & ~PAGE_MASK, size, dir, NULL); -} - /* * Free a DMA mapping. */ @@ -306,12 +299,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr, free_iommu(iommu_page, npages); } -static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction) -{ - gart_unmap_page(dev, dma_addr, size, direction, NULL); -} - /* * Wrapper for pci_unmap_single working with scatterlists. */ @@ -324,7 +311,7 @@ gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) for_each_sg(sg, s, nents, i) { if (!s->dma_length || !s->length) break; - gart_unmap_single(dev, s->dma_address, s->dma_length, dir); + gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL); } } @@ -538,7 +525,7 @@ static void gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr) { - gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL); + gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL); free_pages((unsigned long)vaddr, get_order(size)); } @@ -725,8 +712,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) } static struct dma_mapping_ops gart_dma_ops = { - .map_single = gart_map_single, - .unmap_single = gart_unmap_single, .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, .map_page = gart_map_page, diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 5a73a824ac1c..d42b69c90b40 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -38,13 +38,6 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, return bus; } -static dma_addr_t nommu_map_single(struct device *hwdev, phys_addr_t paddr, - size_t size, int direction) -{ - return nommu_map_page(hwdev, pfn_to_page(paddr >> PAGE_SHIFT), - paddr & ~PAGE_MASK, size, direction, NULL); -} - /* Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scatter-gather version of the * above pci_map_single interface. Here the scatter gather list @@ -88,7 +81,6 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, struct dma_mapping_ops nommu_dma_ops = { .alloc_coherent = dma_generic_alloc_coherent, .free_coherent = nommu_free_coherent, - .map_single = nommu_map_single, .map_sg = nommu_map_sg, .map_page = nommu_map_page, .is_phys = 1, diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index d1c0366886dd..3ae354c0fdef 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -38,13 +38,6 @@ int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) return 0; } -static dma_addr_t -swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, - int direction) -{ - return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); -} - /* these will be moved to lib/swiotlb.c later on */ static dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, @@ -78,8 +71,6 @@ struct dma_mapping_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc_coherent = x86_swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, - .map_single = swiotlb_map_single_phys, - .unmap_single = swiotlb_unmap_single, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 60258ecbcb4c..da273e4ef66c 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2582,8 +2582,6 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, static struct dma_mapping_ops intel_dma_ops = { .alloc_coherent = intel_alloc_coherent, .free_coherent = intel_free_coherent, - .map_single = intel_map_single, - .unmap_single = intel_unmap_single, .map_sg = intel_map_sg, .unmap_sg = intel_unmap_sg, #ifdef CONFIG_X86_64 -- cgit v1.2.3 From 160c1d8e40866edfeae7d68816b7005d70acf391 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 5 Jan 2009 23:59:02 +0900 Subject: x86, ia64: convert to use generic dma_map_ops struct This converts X86 and IA64 to use include/linux/dma-mapping.h. It's a bit large but pretty boring. The major change for X86 is converting 'int dir' to 'enum dma_data_direction dir' in DMA mapping operations. The major changes for IA64 is using map_page and unmap_page instead of map_single and unmap_single. Signed-off-by: FUJITA Tomonori Acked-by: Tony Luck Signed-off-by: Ingo Molnar --- arch/ia64/dig/Makefile | 4 +- arch/ia64/dig/dig_vtd_iommu.c | 77 ------------------- arch/ia64/hp/common/hwsw_iommu.c | 6 +- arch/ia64/hp/common/sba_iommu.c | 46 ++++++++---- arch/ia64/include/asm/dma-mapping.h | 107 ++++++++------------------ arch/ia64/include/asm/machvec.h | 12 +-- arch/ia64/kernel/dma-mapping.c | 4 +- arch/ia64/kernel/machvec.c | 8 +- arch/ia64/kernel/pci-dma.c | 49 +++++++----- arch/ia64/kernel/pci-swiotlb.c | 32 +++++--- arch/ia64/sn/pci/pci_dma.c | 58 +++++++------- arch/x86/include/asm/device.h | 2 +- arch/x86/include/asm/dma-mapping.h | 146 +++++++++++++----------------------- arch/x86/include/asm/iommu.h | 2 +- arch/x86/kernel/amd_iommu.c | 8 +- arch/x86/kernel/pci-calgary_64.c | 15 ++-- arch/x86/kernel/pci-dma.c | 4 +- arch/x86/kernel/pci-gart_64.c | 14 ++-- arch/x86/kernel/pci-nommu.c | 5 +- arch/x86/kernel/pci-swiotlb_64.c | 6 +- drivers/pci/intel-iommu.c | 9 +-- include/linux/intel-iommu.h | 6 +- include/linux/swiotlb.h | 18 +++-- lib/swiotlb.c | 18 +++-- 24 files changed, 278 insertions(+), 378 deletions(-) delete mode 100644 arch/ia64/dig/dig_vtd_iommu.c (limited to 'arch/x86/kernel') diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile index 5c0283830bd6..2f7caddf093e 100644 --- a/arch/ia64/dig/Makefile +++ b/arch/ia64/dig/Makefile @@ -7,8 +7,8 @@ obj-y := setup.o ifeq ($(CONFIG_DMAR), y) -obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o dig_vtd_iommu.o +obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o else obj-$(CONFIG_IA64_GENERIC) += machvec.o endif -obj-$(CONFIG_IA64_DIG_VTD) += dig_vtd_iommu.o + diff --git a/arch/ia64/dig/dig_vtd_iommu.c b/arch/ia64/dig/dig_vtd_iommu.c deleted file mode 100644 index fdb8ba9f4992..000000000000 --- a/arch/ia64/dig/dig_vtd_iommu.c +++ /dev/null @@ -1,77 +0,0 @@ -#include -#include -#include -#include -#include - -void * -vtd_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t flags) -{ - return intel_alloc_coherent(dev, size, dma_handle, flags); -} -EXPORT_SYMBOL_GPL(vtd_alloc_coherent); - -void -vtd_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle) -{ - intel_free_coherent(dev, size, vaddr, dma_handle); -} -EXPORT_SYMBOL_GPL(vtd_free_coherent); - -dma_addr_t -vtd_map_single_attrs(struct device *dev, void *addr, size_t size, - int dir, struct dma_attrs *attrs) -{ - return intel_map_single(dev, (phys_addr_t)addr, size, dir); -} -EXPORT_SYMBOL_GPL(vtd_map_single_attrs); - -void -vtd_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, - int dir, struct dma_attrs *attrs) -{ - intel_unmap_single(dev, iova, size, dir); -} -EXPORT_SYMBOL_GPL(vtd_unmap_single_attrs); - -int -vtd_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents, - int dir, struct dma_attrs *attrs) -{ - return intel_map_sg(dev, sglist, nents, dir); -} -EXPORT_SYMBOL_GPL(vtd_map_sg_attrs); - -void -vtd_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, - int nents, int dir, struct dma_attrs *attrs) -{ - intel_unmap_sg(dev, sglist, nents, dir); -} -EXPORT_SYMBOL_GPL(vtd_unmap_sg_attrs); - -int -vtd_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return 0; -} -EXPORT_SYMBOL_GPL(vtd_dma_mapping_error); - -extern int iommu_dma_supported(struct device *dev, u64 mask); - -struct dma_mapping_ops vtd_dma_ops = { - .alloc_coherent = vtd_alloc_coherent, - .free_coherent = vtd_free_coherent, - .map_single_attrs = vtd_map_single_attrs, - .unmap_single_attrs = vtd_unmap_single_attrs, - .map_sg_attrs = vtd_map_sg_attrs, - .unmap_sg_attrs = vtd_unmap_sg_attrs, - .sync_single_for_cpu = machvec_dma_sync_single, - .sync_sg_for_cpu = machvec_dma_sync_sg, - .sync_single_for_device = machvec_dma_sync_single, - .sync_sg_for_device = machvec_dma_sync_sg, - .dma_supported_op = iommu_dma_supported, - .mapping_error = vtd_dma_mapping_error, -}; diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c index e5bbeba77810..e4a80d82e3d8 100644 --- a/arch/ia64/hp/common/hwsw_iommu.c +++ b/arch/ia64/hp/common/hwsw_iommu.c @@ -17,7 +17,7 @@ #include #include -extern struct dma_mapping_ops sba_dma_ops, swiotlb_dma_ops; +extern struct dma_map_ops sba_dma_ops, swiotlb_dma_ops; /* swiotlb declarations & definitions: */ extern int swiotlb_late_init_with_default_size (size_t size); @@ -30,10 +30,10 @@ extern int swiotlb_late_init_with_default_size (size_t size); static inline int use_swiotlb(struct device *dev) { return dev && dev->dma_mask && - !sba_dma_ops.dma_supported_op(dev, *dev->dma_mask); + !sba_dma_ops.dma_supported(dev, *dev->dma_mask); } -struct dma_mapping_ops *hwsw_dma_get_ops(struct device *dev) +struct dma_map_ops *hwsw_dma_get_ops(struct device *dev) { if (use_swiotlb(dev)) return &swiotlb_dma_ops; diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 29e7206f3dc6..129b62eb39e5 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -909,11 +909,13 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) * * See Documentation/DMA-mapping.txt */ -static dma_addr_t -sba_map_single_attrs(struct device *dev, void *addr, size_t size, int dir, - struct dma_attrs *attrs) +static dma_addr_t sba_map_page(struct device *dev, struct page *page, + unsigned long poff, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) { struct ioc *ioc; + void *addr = page_address(page) + poff; dma_addr_t iovp; dma_addr_t offset; u64 *pdir_start; @@ -992,6 +994,14 @@ sba_map_single_attrs(struct device *dev, void *addr, size_t size, int dir, return SBA_IOVA(ioc, iovp, offset); } +static dma_addr_t sba_map_single_attrs(struct device *dev, void *addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return sba_map_page(dev, virt_to_page(addr), + (unsigned long)addr & ~PAGE_MASK, size, dir, attrs); +} + #ifdef ENABLE_MARK_CLEAN static SBA_INLINE void sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) @@ -1026,8 +1036,8 @@ sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) * * See Documentation/DMA-mapping.txt */ -static void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, - int dir, struct dma_attrs *attrs) +static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) { struct ioc *ioc; #if DELAYED_RESOURCE_CNT > 0 @@ -1095,6 +1105,12 @@ static void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t s #endif /* DELAYED_RESOURCE_CNT == 0 */ } +void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + sba_unmap_page(dev, iova, size, dir, attrs); +} + /** * sba_alloc_coherent - allocate/map shared mem for DMA * @dev: instance of PCI owned by the driver that's asking. @@ -1423,7 +1439,8 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev, * See Documentation/DMA-mapping.txt */ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, - int nents, int dir, struct dma_attrs *attrs) + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) { struct ioc *ioc; int coalesced, filled = 0; @@ -1514,7 +1531,8 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, * See Documentation/DMA-mapping.txt */ static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, - int nents, int dir, struct dma_attrs *attrs) + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) { #ifdef ASSERT_PDIR_SANITY struct ioc *ioc; @@ -2062,7 +2080,7 @@ static struct acpi_driver acpi_sba_ioc_driver = { }, }; -extern struct dma_mapping_ops swiotlb_dma_ops; +extern struct dma_map_ops swiotlb_dma_ops; static int __init sba_init(void) @@ -2176,18 +2194,18 @@ sba_page_override(char *str) __setup("sbapagesize=",sba_page_override); -struct dma_mapping_ops sba_dma_ops = { +struct dma_map_ops sba_dma_ops = { .alloc_coherent = sba_alloc_coherent, .free_coherent = sba_free_coherent, - .map_single_attrs = sba_map_single_attrs, - .unmap_single_attrs = sba_unmap_single_attrs, - .map_sg_attrs = sba_map_sg_attrs, - .unmap_sg_attrs = sba_unmap_sg_attrs, + .map_page = sba_map_page, + .unmap_page = sba_unmap_page, + .map_sg = sba_map_sg_attrs, + .unmap_sg = sba_unmap_sg_attrs, .sync_single_for_cpu = machvec_dma_sync_single, .sync_sg_for_cpu = machvec_dma_sync_sg, .sync_single_for_device = machvec_dma_sync_single, .sync_sg_for_device = machvec_dma_sync_sg, - .dma_supported_op = sba_dma_supported, + .dma_supported = sba_dma_supported, .mapping_error = sba_dma_mapping_error, }; diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index bac3159379f7..d6230f514536 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -9,73 +9,21 @@ #include #include -struct dma_mapping_ops { - int (*mapping_error)(struct device *dev, - dma_addr_t dma_addr); - void* (*alloc_coherent)(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp); - void (*free_coherent)(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle); - dma_addr_t (*map_single)(struct device *hwdev, unsigned long ptr, - size_t size, int direction); - void (*unmap_single)(struct device *dev, dma_addr_t addr, - size_t size, int direction); - dma_addr_t (*map_single_attrs)(struct device *dev, void *cpu_addr, - size_t size, int direction, - struct dma_attrs *attrs); - void (*unmap_single_attrs)(struct device *dev, - dma_addr_t dma_addr, - size_t size, int direction, - struct dma_attrs *attrs); - void (*sync_single_for_cpu)(struct device *hwdev, - dma_addr_t dma_handle, size_t size, - int direction); - void (*sync_single_for_device)(struct device *hwdev, - dma_addr_t dma_handle, size_t size, - int direction); - void (*sync_single_range_for_cpu)(struct device *hwdev, - dma_addr_t dma_handle, unsigned long offset, - size_t size, int direction); - void (*sync_single_range_for_device)(struct device *hwdev, - dma_addr_t dma_handle, unsigned long offset, - size_t size, int direction); - void (*sync_sg_for_cpu)(struct device *hwdev, - struct scatterlist *sg, int nelems, - int direction); - void (*sync_sg_for_device)(struct device *hwdev, - struct scatterlist *sg, int nelems, - int direction); - int (*map_sg)(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); - void (*unmap_sg)(struct device *hwdev, - struct scatterlist *sg, int nents, - int direction); - int (*map_sg_attrs)(struct device *dev, - struct scatterlist *sg, int nents, - int direction, struct dma_attrs *attrs); - void (*unmap_sg_attrs)(struct device *dev, - struct scatterlist *sg, int nents, - int direction, - struct dma_attrs *attrs); - int (*dma_supported_op)(struct device *hwdev, u64 mask); - int is_phys; -}; - -extern struct dma_mapping_ops *dma_ops; +extern struct dma_map_ops *dma_ops; extern struct ia64_machine_vector ia64_mv; extern void set_iommu_machvec(void); static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *daddr, gfp_t gfp) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); return ops->alloc_coherent(dev, size, daddr, gfp | GFP_DMA); } static inline void dma_free_coherent(struct device *dev, size_t size, void *caddr, dma_addr_t daddr) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); ops->free_coherent(dev, size, caddr, daddr); } @@ -87,8 +35,10 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); - return ops->map_single_attrs(dev, caddr, size, dir, attrs); + struct dma_map_ops *ops = platform_dma_get_ops(dev); + return ops->map_page(dev, virt_to_page(caddr), + (unsigned long)caddr & ~PAGE_MASK, size, + dir, attrs); } static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t daddr, @@ -96,8 +46,8 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t daddr, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); - ops->unmap_single_attrs(dev, daddr, size, dir, attrs); + struct dma_map_ops *ops = platform_dma_get_ops(dev); + ops->unmap_page(dev, daddr, size, dir, attrs); } #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL) @@ -107,8 +57,8 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); - return ops->map_sg_attrs(dev, sgl, nents, dir, attrs); + struct dma_map_ops *ops = platform_dma_get_ops(dev); + return ops->map_sg(dev, sgl, nents, dir, attrs); } static inline void dma_unmap_sg_attrs(struct device *dev, @@ -116,8 +66,8 @@ static inline void dma_unmap_sg_attrs(struct device *dev, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); - ops->unmap_sg_attrs(dev, sgl, nents, dir, attrs); + struct dma_map_ops *ops = platform_dma_get_ops(dev); + ops->unmap_sg(dev, sgl, nents, dir, attrs); } #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL) @@ -127,7 +77,7 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t daddr, size_t size, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); ops->sync_single_for_cpu(dev, daddr, size, dir); } @@ -135,7 +85,7 @@ static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); ops->sync_sg_for_cpu(dev, sgl, nents, dir); } @@ -144,7 +94,7 @@ static inline void dma_sync_single_for_device(struct device *dev, size_t size, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); ops->sync_single_for_device(dev, daddr, size, dir); } @@ -153,20 +103,29 @@ static inline void dma_sync_sg_for_device(struct device *dev, int nents, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); ops->sync_sg_for_device(dev, sgl, nents, dir); } static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); return ops->mapping_error(dev, daddr); } -#define dma_map_page(dev, pg, off, size, dir) \ - dma_map_single(dev, page_address(pg) + (off), (size), (dir)) -#define dma_unmap_page(dev, dma_addr, size, dir) \ - dma_unmap_single(dev, dma_addr, size, dir) +static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, + size_t offset, size_t size, + enum dma_data_direction dir) +{ + struct dma_map_ops *ops = platform_dma_get_ops(dev); + return ops->map_page(dev, page, offset, size, dir, NULL); +} + +static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + dma_unmap_single(dev, addr, size, dir); +} /* * Rest of this file is part of the "Advanced DMA API". Use at your own risk. @@ -180,8 +139,8 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr) static inline int dma_supported(struct device *dev, u64 mask) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); - return ops->dma_supported_op(dev, mask); + struct dma_map_ops *ops = platform_dma_get_ops(dev); + return ops->dma_supported(dev, mask); } static inline int diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h index 95e1708fa4e3..e8442c7e4cc8 100644 --- a/arch/ia64/include/asm/machvec.h +++ b/arch/ia64/include/asm/machvec.h @@ -11,7 +11,6 @@ #define _ASM_IA64_MACHVEC_H #include -#include /* forward declarations: */ struct device; @@ -24,6 +23,7 @@ struct task_struct; struct pci_dev; struct msi_desc; struct dma_attrs; +enum dma_data_direction; typedef void ia64_mv_setup_t (char **); typedef void ia64_mv_cpu_init_t (void); @@ -45,7 +45,7 @@ typedef void ia64_mv_kernel_launch_event_t(void); /* DMA-mapping interface: */ typedef void ia64_mv_dma_init (void); -typedef struct dma_mapping_ops *ia64_mv_dma_get_ops(struct device *); +typedef struct dma_map_ops *ia64_mv_dma_get_ops(struct device *); /* * WARNING: The legacy I/O space is _architected_. Platforms are @@ -97,8 +97,10 @@ machvec_noop_bus (struct pci_bus *bus) extern void machvec_setup (char **); extern void machvec_timer_interrupt (int, void *); -extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int); -extern void machvec_dma_sync_sg (struct device *, struct scatterlist *, int, int); +extern void machvec_dma_sync_single(struct device *, dma_addr_t, size_t, + enum dma_data_direction); +extern void machvec_dma_sync_sg(struct device *, struct scatterlist *, int, + enum dma_data_direction); extern void machvec_tlb_migrate_finish (struct mm_struct *); # if defined (CONFIG_IA64_HP_SIM) @@ -250,7 +252,7 @@ extern void machvec_init_from_cmdline(const char *cmdline); # endif /* CONFIG_IA64_GENERIC */ extern void swiotlb_dma_init(void); -extern struct dma_mapping_ops *dma_get_ops(struct device *); +extern struct dma_map_ops *dma_get_ops(struct device *); /* * Define default versions so we can extend machvec for new platforms without having diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index 427f69617226..7060e13fa421 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -1,9 +1,9 @@ #include -struct dma_mapping_ops *dma_ops; +struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); -struct dma_mapping_ops *dma_get_ops(struct device *dev) +struct dma_map_ops *dma_get_ops(struct device *dev) { return dma_ops; } diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c index 7ccb228ceedc..d41a40ef80c0 100644 --- a/arch/ia64/kernel/machvec.c +++ b/arch/ia64/kernel/machvec.c @@ -1,5 +1,5 @@ #include - +#include #include #include @@ -75,14 +75,16 @@ machvec_timer_interrupt (int irq, void *dev_id) EXPORT_SYMBOL(machvec_timer_interrupt); void -machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle, size_t size, int dir) +machvec_dma_sync_single(struct device *hwdev, dma_addr_t dma_handle, size_t size, + enum dma_data_direction dir) { mb(); } EXPORT_SYMBOL(machvec_dma_sync_single); void -machvec_dma_sync_sg (struct device *hwdev, struct scatterlist *sg, int n, int dir) +machvec_dma_sync_sg(struct device *hwdev, struct scatterlist *sg, int n, + enum dma_data_direction dir) { mb(); } diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index 640669eba5d4..b30209ec8c6e 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -41,21 +41,7 @@ struct device fallback_dev = { .dma_mask = &fallback_dev.coherent_dma_mask, }; -extern struct dma_mapping_ops vtd_dma_ops; - -void __init pci_iommu_alloc(void) -{ - dma_ops = &vtd_dma_ops; - /* - * The order of these functions is important for - * fall-back/fail-over reasons - */ - detect_intel_iommu(); - -#ifdef CONFIG_SWIOTLB - pci_swiotlb_init(); -#endif -} +extern struct dma_map_ops intel_dma_ops; static int __init pci_iommu_init(void) { @@ -81,10 +67,10 @@ iommu_dma_init(void) int iommu_dma_supported(struct device *dev, u64 mask) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); - if (ops->dma_supported_op) - return ops->dma_supported_op(dev, mask); + if (ops->dma_supported) + return ops->dma_supported(dev, mask); /* Copied from i386. Doesn't make much sense, because it will only work for pci_alloc_coherent. @@ -113,4 +99,31 @@ int iommu_dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(iommu_dma_supported); +static int vtd_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return 0; +} + +void __init pci_iommu_alloc(void) +{ + dma_ops = &intel_dma_ops; + + dma_ops->sync_single_for_cpu = machvec_dma_sync_single; + dma_ops->sync_sg_for_cpu = machvec_dma_sync_sg; + dma_ops->sync_single_for_device = machvec_dma_sync_single; + dma_ops->sync_sg_for_device = machvec_dma_sync_sg; + dma_ops->dma_supported = iommu_dma_supported; + dma_ops->mapping_error = vtd_dma_mapping_error; + + /* + * The order of these functions is important for + * fall-back/fail-over reasons + */ + detect_intel_iommu(); + +#ifdef CONFIG_SWIOTLB + pci_swiotlb_init(); +#endif +} + #endif diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 9f172c864377..6bf8f66786bd 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -16,24 +16,36 @@ EXPORT_SYMBOL(swiotlb); /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly; -struct dma_mapping_ops swiotlb_dma_ops = { +static dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return swiotlb_map_single_attrs(dev, page_address(page) + offset, size, + dir, attrs); +} + +static void swiotlb_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + swiotlb_unmap_single_attrs(dev, dma_handle, size, dir, attrs); +} + +struct dma_map_ops swiotlb_dma_ops = { .alloc_coherent = swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, - .map_single = swiotlb_map_single, - .unmap_single = swiotlb_unmap_single, - .map_single_attrs = swiotlb_map_single_attrs, - .unmap_single_attrs = swiotlb_unmap_single_attrs, - .map_sg_attrs = swiotlb_map_sg_attrs, - .unmap_sg_attrs = swiotlb_unmap_sg_attrs, + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, .sync_single_range_for_device = swiotlb_sync_single_range_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg, - .unmap_sg = swiotlb_unmap_sg, - .dma_supported_op = swiotlb_dma_supported, + .dma_supported = swiotlb_dma_supported, .mapping_error = swiotlb_dma_mapping_error, }; diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index efdd69490009..9c788f9cedfd 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -10,7 +10,6 @@ */ #include -#include #include #include #include @@ -171,10 +170,12 @@ static void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr * TODO: simplify our interface; * figure out how to save dmamap handle so can use two step. */ -static dma_addr_t sn_dma_map_single_attrs(struct device *dev, void *cpu_addr, - size_t size, int direction, - struct dma_attrs *attrs) +static dma_addr_t sn_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) { + void *cpu_addr = page_address(page) + offset; dma_addr_t dma_addr; unsigned long phys_addr; struct pci_dev *pdev = to_pci_dev(dev); @@ -212,20 +213,20 @@ static dma_addr_t sn_dma_map_single_attrs(struct device *dev, void *cpu_addr, * by @dma_handle into the coherence domain. On SN, we're always cache * coherent, so we just need to free any ATEs associated with this mapping. */ -static void sn_dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction, - struct dma_attrs *attrs) +static void sn_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { struct pci_dev *pdev = to_pci_dev(dev); struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); BUG_ON(dev->bus != &pci_bus_type); - provider->dma_unmap(pdev, dma_addr, direction); + provider->dma_unmap(pdev, dma_addr, dir); } /** - * sn_dma_unmap_sg_attrs - unmap a DMA scatterlist + * sn_dma_unmap_sg - unmap a DMA scatterlist * @dev: device to unmap * @sg: scatterlist to unmap * @nhwentries: number of scatterlist entries @@ -234,9 +235,9 @@ static void sn_dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr, * * Unmap a set of streaming mode DMA translations. */ -static void sn_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, - int nhwentries, int direction, - struct dma_attrs *attrs) +static void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nhwentries, enum dma_data_direction dir, + struct dma_attrs *attrs) { int i; struct pci_dev *pdev = to_pci_dev(dev); @@ -246,14 +247,14 @@ static void sn_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, BUG_ON(dev->bus != &pci_bus_type); for_each_sg(sgl, sg, nhwentries, i) { - provider->dma_unmap(pdev, sg->dma_address, direction); + provider->dma_unmap(pdev, sg->dma_address, dir); sg->dma_address = (dma_addr_t) NULL; sg->dma_length = 0; } } /** - * sn_dma_map_sg_attrs - map a scatterlist for DMA + * sn_dma_map_sg - map a scatterlist for DMA * @dev: device to map for * @sg: scatterlist to map * @nhwentries: number of entries @@ -267,8 +268,9 @@ static void sn_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, * * Maps each entry of @sg for DMA. */ -static int sn_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, - int nhwentries, int direction, struct dma_attrs *attrs) +static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, + int nhwentries, enum dma_data_direction dir, + struct dma_attrs *attrs) { unsigned long phys_addr; struct scatterlist *saved_sg = sgl, *sg; @@ -305,8 +307,7 @@ static int sn_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, * Free any successfully allocated entries. */ if (i > 0) - sn_dma_unmap_sg_attrs(dev, saved_sg, i, - direction, attrs); + sn_dma_unmap_sg(dev, saved_sg, i, dir, attrs); return 0; } @@ -317,25 +318,26 @@ static int sn_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, } static void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, - size_t size, int direction) + size_t size, enum dma_data_direction dir) { BUG_ON(dev->bus != &pci_bus_type); } static void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, - size_t size, int direction) + size_t size, + enum dma_data_direction dir) { BUG_ON(dev->bus != &pci_bus_type); } static void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, int direction) + int nelems, enum dma_data_direction dir) { BUG_ON(dev->bus != &pci_bus_type); } static void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, int direction) + int nelems, enum dma_data_direction dir) { BUG_ON(dev->bus != &pci_bus_type); } @@ -455,19 +457,19 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) return ret; } -static struct dma_mapping_ops sn_dma_ops = { +static struct dma_map_ops sn_dma_ops = { .alloc_coherent = sn_dma_alloc_coherent, .free_coherent = sn_dma_free_coherent, - .map_single_attrs = sn_dma_map_single_attrs, - .unmap_single_attrs = sn_dma_unmap_single_attrs, - .map_sg_attrs = sn_dma_map_sg_attrs, - .unmap_sg_attrs = sn_dma_unmap_sg_attrs, + .map_page = sn_dma_map_page, + .unmap_page = sn_dma_unmap_page, + .map_sg = sn_dma_map_sg, + .unmap_sg = sn_dma_unmap_sg, .sync_single_for_cpu = sn_dma_sync_single_for_cpu, .sync_sg_for_cpu = sn_dma_sync_sg_for_cpu, .sync_single_for_device = sn_dma_sync_single_for_device, .sync_sg_for_device = sn_dma_sync_sg_for_device, .mapping_error = sn_dma_mapping_error, - .dma_supported_op = sn_dma_supported, + .dma_supported = sn_dma_supported, }; void sn_dma_init(void) diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 3c034f48fdb0..4994a20acbcb 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -6,7 +6,7 @@ struct dev_archdata { void *acpi_handle; #endif #ifdef CONFIG_X86_64 -struct dma_mapping_ops *dma_ops; +struct dma_map_ops *dma_ops; #endif #ifdef CONFIG_DMAR void *iommu; /* hook for IOMMU specific extension */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index b81f82268a16..5a347805a6c7 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -17,50 +17,9 @@ extern int iommu_merge; extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; -struct dma_mapping_ops { - int (*mapping_error)(struct device *dev, - dma_addr_t dma_addr); - void* (*alloc_coherent)(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp); - void (*free_coherent)(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle); - void (*sync_single_for_cpu)(struct device *hwdev, - dma_addr_t dma_handle, size_t size, - int direction); - void (*sync_single_for_device)(struct device *hwdev, - dma_addr_t dma_handle, size_t size, - int direction); - void (*sync_single_range_for_cpu)(struct device *hwdev, - dma_addr_t dma_handle, unsigned long offset, - size_t size, int direction); - void (*sync_single_range_for_device)(struct device *hwdev, - dma_addr_t dma_handle, unsigned long offset, - size_t size, int direction); - void (*sync_sg_for_cpu)(struct device *hwdev, - struct scatterlist *sg, int nelems, - int direction); - void (*sync_sg_for_device)(struct device *hwdev, - struct scatterlist *sg, int nelems, - int direction); - int (*map_sg)(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); - void (*unmap_sg)(struct device *hwdev, - struct scatterlist *sg, int nents, - int direction); - dma_addr_t (*map_page)(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - struct dma_attrs *attrs); - void (*unmap_page)(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs); - int (*dma_supported)(struct device *hwdev, u64 mask); - int is_phys; -}; - -extern struct dma_mapping_ops *dma_ops; - -static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) +extern struct dma_map_ops *dma_ops; + +static inline struct dma_map_ops *get_dma_ops(struct device *dev) { #ifdef CONFIG_X86_32 return dma_ops; @@ -75,7 +34,7 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) /* Make sure we keep the same behaviour */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - struct dma_mapping_ops *ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); @@ -94,138 +53,139 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, static inline dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size, - int direction) + enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); return ops->map_page(hwdev, virt_to_page(ptr), (unsigned long)ptr & ~PAGE_MASK, size, - direction, NULL); + dir, NULL); } static inline void dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, - int direction) + enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->unmap_page) - ops->unmap_page(dev, addr, size, direction, NULL); + ops->unmap_page(dev, addr, size, dir, NULL); } static inline int dma_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction) + int nents, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); - return ops->map_sg(hwdev, sg, nents, direction); + BUG_ON(!valid_dma_direction(dir)); + return ops->map_sg(hwdev, sg, nents, dir, NULL); } static inline void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, - int direction) + enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->unmap_sg) - ops->unmap_sg(hwdev, sg, nents, direction); + ops->unmap_sg(hwdev, sg, nents, dir, NULL); } static inline void dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, - size_t size, int direction) + size_t size, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_for_cpu) - ops->sync_single_for_cpu(hwdev, dma_handle, size, direction); + ops->sync_single_for_cpu(hwdev, dma_handle, size, dir); flush_write_buffers(); } static inline void dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, - size_t size, int direction) + size_t size, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_for_device) - ops->sync_single_for_device(hwdev, dma_handle, size, direction); + ops->sync_single_for_device(hwdev, dma_handle, size, dir); flush_write_buffers(); } static inline void dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, - unsigned long offset, size_t size, int direction) + unsigned long offset, size_t size, + enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_range_for_cpu) ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, - size, direction); + size, dir); flush_write_buffers(); } static inline void dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, unsigned long offset, size_t size, - int direction) + enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_range_for_device) ops->sync_single_range_for_device(hwdev, dma_handle, - offset, size, direction); + offset, size, dir); flush_write_buffers(); } static inline void dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, int direction) + int nelems, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->sync_sg_for_cpu) - ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); + ops->sync_sg_for_cpu(hwdev, sg, nelems, dir); flush_write_buffers(); } static inline void dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, int direction) + int nelems, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->sync_sg_for_device) - ops->sync_sg_for_device(hwdev, sg, nelems, direction); + ops->sync_sg_for_device(hwdev, sg, nelems, dir); flush_write_buffers(); } static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, - int direction) + enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!valid_dma_direction(direction)); - return ops->map_page(dev, page, offset, size, direction, NULL); + BUG_ON(!valid_dma_direction(dir)); + return ops->map_page(dev, page, offset, size, dir, NULL); } static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, int direction) + size_t size, enum dma_data_direction dir) { - dma_unmap_single(dev, addr, size, direction); + dma_unmap_single(dev, addr, size, dir); } static inline void @@ -271,7 +231,7 @@ static inline void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { - struct dma_mapping_ops *ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); void *memory; gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); @@ -297,7 +257,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, static inline void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t bus) { - struct dma_mapping_ops *ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); WARN_ON(irqs_disabled()); /* for portability */ diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index a6ee9e6f530f..af326a2975b5 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -3,7 +3,7 @@ extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); -extern struct dma_mapping_ops nommu_dma_ops; +extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a5dedb690a9a..008e522b9536 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1394,7 +1394,8 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, * lists). */ static int map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, int dir) + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) { unsigned long flags; struct amd_iommu *iommu; @@ -1461,7 +1462,8 @@ unmap: * lists). */ static void unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, int dir) + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) { unsigned long flags; struct amd_iommu *iommu; @@ -1648,7 +1650,7 @@ static void prealloc_protection_domains(void) } } -static struct dma_mapping_ops amd_iommu_dma_ops = { +static struct dma_map_ops amd_iommu_dma_ops = { .alloc_coherent = alloc_coherent, .free_coherent = free_coherent, .map_page = map_page, diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 756138b604e1..755c21e906f3 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -380,8 +380,9 @@ static inline struct iommu_table *find_iommu_table(struct device *dev) return tbl; } -static void calgary_unmap_sg(struct device *dev, - struct scatterlist *sglist, int nelems, int direction) +static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems,enum dma_data_direction dir, + struct dma_attrs *attrs) { struct iommu_table *tbl = find_iommu_table(dev); struct scatterlist *s; @@ -404,7 +405,8 @@ static void calgary_unmap_sg(struct device *dev, } static int calgary_map_sg(struct device *dev, struct scatterlist *sg, - int nelems, int direction) + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) { struct iommu_table *tbl = find_iommu_table(dev); struct scatterlist *s; @@ -429,15 +431,14 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, s->dma_address = (entry << PAGE_SHIFT) | s->offset; /* insert into HW table */ - tce_build(tbl, entry, npages, vaddr & PAGE_MASK, - direction); + tce_build(tbl, entry, npages, vaddr & PAGE_MASK, dir); s->dma_length = s->length; } return nelems; error: - calgary_unmap_sg(dev, sg, nelems, direction); + calgary_unmap_sg(dev, sg, nelems, dir, NULL); for_each_sg(sg, s, nelems, i) { sg->dma_address = bad_dma_address; sg->dma_length = 0; @@ -518,7 +519,7 @@ static void calgary_free_coherent(struct device *dev, size_t size, free_pages((unsigned long)vaddr, get_order(size)); } -static struct dma_mapping_ops calgary_dma_ops = { +static struct dma_map_ops calgary_dma_ops = { .alloc_coherent = calgary_alloc_coherent, .free_coherent = calgary_free_coherent, .map_sg = calgary_map_sg, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 19a1044a0cd9..0d75c129b18a 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -12,7 +12,7 @@ static int forbid_dac __read_mostly; -struct dma_mapping_ops *dma_ops; +struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -224,7 +224,7 @@ early_param("iommu", iommu_setup); int dma_supported(struct device *dev, u64 mask) { - struct dma_mapping_ops *ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); #ifdef CONFIG_PCI if (mask > 0xffffffff && forbid_dac > 0) { diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9c557c0c928c..8cb3e45439cf 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -302,8 +302,8 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr, /* * Wrapper for pci_unmap_single working with scatterlists. */ -static void -gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) +static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) { struct scatterlist *s; int i; @@ -333,7 +333,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, addr = dma_map_area(dev, addr, s->length, dir, 0); if (addr == bad_dma_address) { if (i > 0) - gart_unmap_sg(dev, sg, i, dir); + gart_unmap_sg(dev, sg, i, dir, NULL); nents = 0; sg[0].dma_length = 0; break; @@ -404,8 +404,8 @@ dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, * DMA map all entries in a scatterlist. * Merge chunks that have page aligned sizes into a continuous mapping. */ -static int -gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) +static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) { struct scatterlist *s, *ps, *start_sg, *sgmap; int need = 0, nextneed, i, out, start; @@ -472,7 +472,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) error: flush_gart(); - gart_unmap_sg(dev, sg, out, dir); + gart_unmap_sg(dev, sg, out, dir, NULL); /* When it was forced or merged try again in a dumb way */ if (force_iommu || iommu_merge) { @@ -711,7 +711,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) return -1; } -static struct dma_mapping_ops gart_dma_ops = { +static struct dma_map_ops gart_dma_ops = { .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, .map_page = gart_map_page, diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index d42b69c90b40..fe50214db876 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -54,7 +54,8 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, * the same here. */ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction) + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) { struct scatterlist *s; int i; @@ -78,7 +79,7 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, get_order(size)); } -struct dma_mapping_ops nommu_dma_ops = { +struct dma_map_ops nommu_dma_ops = { .alloc_coherent = dma_generic_alloc_coherent, .free_coherent = nommu_free_coherent, .map_sg = nommu_map_sg, diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 3ae354c0fdef..3f0d9924dd1c 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -67,7 +67,7 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); } -struct dma_mapping_ops swiotlb_dma_ops = { +struct dma_map_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc_coherent = x86_swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, @@ -77,8 +77,8 @@ struct dma_mapping_ops swiotlb_dma_ops = { .sync_single_range_for_device = swiotlb_sync_single_range_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg, - .unmap_sg = swiotlb_unmap_sg, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, .dma_supported = NULL, diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index da273e4ef66c..b9a562933903 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2441,7 +2441,8 @@ void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, - int nelems, int dir) + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) { int i; struct pci_dev *pdev = to_pci_dev(hwdev); @@ -2499,7 +2500,7 @@ static int intel_nontranslate_map_sg(struct device *hddev, } int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, - int dir) + enum dma_data_direction dir, struct dma_attrs *attrs) { void *addr; int i; @@ -2579,15 +2580,13 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, return nelems; } -static struct dma_mapping_ops intel_dma_ops = { +struct dma_map_ops intel_dma_ops = { .alloc_coherent = intel_alloc_coherent, .free_coherent = intel_free_coherent, .map_sg = intel_map_sg, .unmap_sg = intel_unmap_sg, -#ifdef CONFIG_X86_64 .map_page = intel_map_page, .unmap_page = intel_unmap_page, -#endif }; static inline int iommu_domain_cache_init(void) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index c4f6c101dbcd..a254db1decd0 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -334,7 +334,9 @@ extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t); extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t, int); extern void intel_unmap_single(struct device *, dma_addr_t, size_t, int); -extern int intel_map_sg(struct device *, struct scatterlist *, int, int); -extern void intel_unmap_sg(struct device *, struct scatterlist *, int, int); +extern int intel_map_sg(struct device *, struct scatterlist *, int, + enum dma_data_direction, struct dma_attrs *); +extern void intel_unmap_sg(struct device *, struct scatterlist *, int, + enum dma_data_direction, struct dma_attrs *); #endif diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index dedd3c0cfe30..0567c3d8633b 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -66,36 +66,38 @@ swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, extern int swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir, struct dma_attrs *attrs); + enum dma_data_direction dir, struct dma_attrs *attrs); extern void swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, - int nelems, int dir, struct dma_attrs *attrs); + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs); extern void swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir); + size_t size, enum dma_data_direction dir); extern void swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir); + int nelems, enum dma_data_direction dir); extern void swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir); + size_t size, enum dma_data_direction dir); extern void swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir); + int nelems, enum dma_data_direction dir); extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, int dir); + unsigned long offset, size_t size, + enum dma_data_direction dir); extern void swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, unsigned long offset, size_t size, - int dir); + enum dma_data_direction dir); extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 48deef7e1976..d047de990a3f 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -736,7 +736,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, void swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) + size_t size, enum dma_data_direction dir) { swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); } @@ -744,7 +744,7 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); void swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) + size_t size, enum dma_data_direction dir) { swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); } @@ -769,7 +769,8 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, void swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, int dir) + unsigned long offset, size_t size, + enum dma_data_direction dir) { swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, SYNC_FOR_CPU); @@ -778,7 +779,8 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu); void swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, int dir) + unsigned long offset, size_t size, + enum dma_data_direction dir) { swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, SYNC_FOR_DEVICE); @@ -803,7 +805,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); */ int swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir, struct dma_attrs *attrs) + enum dma_data_direction dir, struct dma_attrs *attrs) { struct scatterlist *sg; int i; @@ -850,7 +852,7 @@ EXPORT_SYMBOL(swiotlb_map_sg); */ void swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, - int nelems, int dir, struct dma_attrs *attrs) + int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { struct scatterlist *sg; int i; @@ -902,7 +904,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, void swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir) + int nelems, enum dma_data_direction dir) { swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); } @@ -910,7 +912,7 @@ EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); void swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir) + int nelems, enum dma_data_direction dir) { swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); } -- cgit v1.2.3 From f98eee8ea99fe74ee9c4e867ba178ec3072793be Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 5 Jan 2009 23:59:03 +0900 Subject: x86, ia64: remove duplicated swiotlb code This adds swiotlb_map_page and swiotlb_unmap_page to lib/swiotlb.c and remove IA64 and X86's swiotlb_map_page and swiotlb_unmap_page. This also removes unnecessary swiotlb_map_single, swiotlb_map_single_attrs, swiotlb_unmap_single and swiotlb_unmap_single_attrs. Signed-off-by: FUJITA Tomonori Acked-by: Tony Luck Signed-off-by: Ingo Molnar --- arch/ia64/kernel/pci-swiotlb.c | 16 -------------- arch/x86/kernel/pci-swiotlb_64.c | 17 -------------- include/linux/swiotlb.h | 21 ++++++------------ lib/swiotlb.c | 48 +++++++++++++++------------------------- 4 files changed, 25 insertions(+), 77 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 6bf8f66786bd..e6b2ec9b27da 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -16,22 +16,6 @@ EXPORT_SYMBOL(swiotlb); /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly; -static dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - return swiotlb_map_single_attrs(dev, page_address(page) + offset, size, - dir, attrs); -} - -static void swiotlb_unmap_page(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - swiotlb_unmap_single_attrs(dev, dma_handle, size, dir, attrs); -} - struct dma_map_ops swiotlb_dma_ops = { .alloc_coherent = swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 3f0d9924dd1c..5e32c4f6a7ba 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -38,23 +38,6 @@ int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) return 0; } -/* these will be moved to lib/swiotlb.c later on */ - -static dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - return swiotlb_map_single(dev, page_address(page) + offset, size, dir); -} - -static void swiotlb_unmap_page(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - swiotlb_unmap_single(dev, dma_handle, size, dir); -} - static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 0567c3d8633b..493dc17e7c87 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -41,20 +41,13 @@ extern void swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle); -extern dma_addr_t -swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir); - -extern void -swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir); - -extern dma_addr_t -swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, - int dir, struct dma_attrs *attrs); - -extern void -swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir, struct dma_attrs *attrs); +extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs); +extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs); extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, diff --git a/lib/swiotlb.c b/lib/swiotlb.c index d047de990a3f..ec7922bd0d61 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -636,11 +636,14 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) * Once the device is given the dma address, the device owns this memory until * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. */ -dma_addr_t -swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, - int dir, struct dma_attrs *attrs) -{ - dma_addr_t dev_addr = swiotlb_virt_to_bus(hwdev, ptr); +dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + void *ptr = page_address(page) + offset; + dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys); void *map; BUG_ON(dir == DMA_NONE); @@ -649,37 +652,30 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, * we can safely return the device addr and not worry about bounce * buffering it. */ - if (!address_needs_mapping(hwdev, dev_addr, size) && + if (!address_needs_mapping(dev, dev_addr, size) && !range_needs_mapping(ptr, size)) return dev_addr; /* * Oh well, have to allocate and map a bounce buffer. */ - map = map_single(hwdev, virt_to_phys(ptr), size, dir); + map = map_single(dev, phys, size, dir); if (!map) { - swiotlb_full(hwdev, size, dir, 1); + swiotlb_full(dev, size, dir, 1); map = io_tlb_overflow_buffer; } - dev_addr = swiotlb_virt_to_bus(hwdev, map); + dev_addr = swiotlb_virt_to_bus(dev, map); /* * Ensure that the address returned is DMA'ble */ - if (address_needs_mapping(hwdev, dev_addr, size)) + if (address_needs_mapping(dev, dev_addr, size)) panic("map_single: bounce buffer is not DMA'ble"); return dev_addr; } -EXPORT_SYMBOL(swiotlb_map_single_attrs); - -dma_addr_t -swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) -{ - return swiotlb_map_single_attrs(hwdev, ptr, size, dir, NULL); -} -EXPORT_SYMBOL(swiotlb_map_single); +EXPORT_SYMBOL_GPL(swiotlb_map_page); /* * Unmap a single streaming mode DMA translation. The dma_addr and size must @@ -689,9 +685,9 @@ EXPORT_SYMBOL(swiotlb_map_single); * After this call, reads by the cpu to the buffer are guaranteed to see * whatever the device wrote there. */ -void -swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir, struct dma_attrs *attrs) +void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { char *dma_addr = swiotlb_bus_to_virt(dev_addr); @@ -701,15 +697,7 @@ swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, else if (dir == DMA_FROM_DEVICE) dma_mark_clean(dma_addr, size); } -EXPORT_SYMBOL(swiotlb_unmap_single_attrs); - -void -swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, - int dir) -{ - return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL); -} -EXPORT_SYMBOL(swiotlb_unmap_single); +EXPORT_SYMBOL_GPL(swiotlb_unmap_page); /* * Make physical memory consistent for a single streaming mode DMA translation -- cgit v1.2.3 From 0b8698ab5847cbe25775083659f00c658a8161c9 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 9 Jan 2009 18:32:09 +0000 Subject: swiotlb: range_needs_mapping should take a physical address. The swiotlb_arch_range_needs_mapping() hook should take a physical address rather than a virtual address in order to support highmem pages. Signed-off-by: Ian Campbell Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-swiotlb_64.c | 2 +- include/linux/swiotlb.h | 2 +- lib/swiotlb.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 5e32c4f6a7ba..34f12e9996ed 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -33,7 +33,7 @@ phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) return baddr; } -int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) +int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) { return 0; } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 493dc17e7c87..ac9ff54f7cb3 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -31,7 +31,7 @@ extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t address); extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); -extern int swiotlb_arch_range_needs_mapping(void *ptr, size_t size); +extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size); extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 30fe65ede2bb..31bae40830ca 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -145,7 +145,7 @@ static void *swiotlb_bus_to_virt(dma_addr_t address) return phys_to_virt(swiotlb_bus_to_phys(address)); } -int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) +int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) { return 0; } @@ -315,9 +315,9 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size) return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); } -static inline int range_needs_mapping(void *ptr, size_t size) +static inline int range_needs_mapping(phys_addr_t paddr, size_t size) { - return swiotlb_force || swiotlb_arch_range_needs_mapping(ptr, size); + return swiotlb_force || swiotlb_arch_range_needs_mapping(paddr, size); } static int is_swiotlb_buffer(char *addr) @@ -653,7 +653,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, * buffering it. */ if (!address_needs_mapping(dev, dev_addr, size) && - !range_needs_mapping(ptr, size)) + !range_needs_mapping(virt_to_phys(ptr), size)) return dev_addr; /* @@ -804,7 +804,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, void *addr = sg_virt(sg); dma_addr_t dev_addr = swiotlb_virt_to_bus(hwdev, addr); - if (range_needs_mapping(addr, sg->length) || + if (range_needs_mapping(sg_phys(sg), sg->length) || address_needs_mapping(hwdev, dev_addr, sg->length)) { void *map = map_single(hwdev, sg_phys(sg), sg->length, dir); -- cgit v1.2.3 From 42bb8cc5e81028e217105299001070d57eb84ad7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 9 Jan 2009 12:17:40 -0800 Subject: x86: hpet: allow force enable on ICH10 HPET Intel "Smackover" x58 BIOS don't have HPET enabled in the BIOS, so allow to force enable it at least. The register layout is the same as in other recent ICHs, so all the code can be reused. Using numerical PCI-ID because it's unlikely the PCI-ID will be used anywhere else. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/quirks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 309949e9e1c1..697d1b78cfbf 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -172,7 +172,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4, ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, ich_force_enable_hpet); - +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3a16, /* ICH10 */ + ich_force_enable_hpet); static struct pci_dev *cached_dev; -- cgit v1.2.3 From b041cf22ddb742874040d0a3259d0be46f3d3a4b Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 23 Jan 2009 10:56:16 +0000 Subject: x86: rename arch/x86/kernel/pci-swiotlb_64.c => pci-swiotlb.c The file is used for 32 and 64 bit since: commit cfb80c9eae8c7ed8f2ee81090062d15ead51cbe8 Author: Jeremy Fitzhardinge Date: Tue Dec 16 12:17:36 2008 -0800 x86: unify pci iommu setup and allow swiotlb to compile for 32 bit Signed-off-by: Ian Campbell Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/pci-swiotlb.c | 84 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/pci-swiotlb_64.c | 84 ---------------------------------------- 3 files changed, 85 insertions(+), 85 deletions(-) create mode 100644 arch/x86/kernel/pci-swiotlb.c delete mode 100644 arch/x86/kernel/pci-swiotlb_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d364df03c1d6..bb1eef62d8f0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -109,7 +109,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o -obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 +obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o ### # 64 bit specific files diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c new file mode 100644 index 000000000000..34f12e9996ed --- /dev/null +++ b/arch/x86/kernel/pci-swiotlb.c @@ -0,0 +1,84 @@ +/* Glue code to lib/swiotlb.c */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +int swiotlb __read_mostly; + +void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) +{ + return alloc_bootmem_low_pages(size); +} + +void *swiotlb_alloc(unsigned order, unsigned long nslabs) +{ + return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); +} + +dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) +{ + return paddr; +} + +phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) +{ + return baddr; +} + +int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) +{ + return 0; +} + +static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) +{ + void *vaddr; + + vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags); + if (vaddr) + return vaddr; + + return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); +} + +struct dma_map_ops swiotlb_dma_ops = { + .mapping_error = swiotlb_dma_mapping_error, + .alloc_coherent = x86_swiotlb_alloc_coherent, + .free_coherent = swiotlb_free_coherent, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, + .sync_single_range_for_device = swiotlb_sync_single_range_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, + .dma_supported = NULL, +}; + +void __init pci_swiotlb_init(void) +{ + /* don't initialize swiotlb if iommu=off (no_iommu=1) */ +#ifdef CONFIG_X86_64 + if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) + swiotlb = 1; +#endif + if (swiotlb_force) + swiotlb = 1; + if (swiotlb) { + printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); + swiotlb_init(); + dma_ops = &swiotlb_dma_ops; + } +} diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c deleted file mode 100644 index 34f12e9996ed..000000000000 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ /dev/null @@ -1,84 +0,0 @@ -/* Glue code to lib/swiotlb.c */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -int swiotlb __read_mostly; - -void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) -{ - return alloc_bootmem_low_pages(size); -} - -void *swiotlb_alloc(unsigned order, unsigned long nslabs) -{ - return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); -} - -dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) -{ - return paddr; -} - -phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) -{ - return baddr; -} - -int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) -{ - return 0; -} - -static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) -{ - void *vaddr; - - vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags); - if (vaddr) - return vaddr; - - return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); -} - -struct dma_map_ops swiotlb_dma_ops = { - .mapping_error = swiotlb_dma_mapping_error, - .alloc_coherent = x86_swiotlb_alloc_coherent, - .free_coherent = swiotlb_free_coherent, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, - .sync_single_range_for_device = swiotlb_sync_single_range_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .dma_supported = NULL, -}; - -void __init pci_swiotlb_init(void) -{ - /* don't initialize swiotlb if iommu=off (no_iommu=1) */ -#ifdef CONFIG_X86_64 - if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) - swiotlb = 1; -#endif - if (swiotlb_force) - swiotlb = 1; - if (swiotlb) { - printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); - swiotlb_init(); - dma_ops = &swiotlb_dma_ops; - } -} -- cgit v1.2.3 From 712406a6bf59ebf4a00358bb59a4a2a1b2953d90 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 9 Feb 2009 10:54:03 -0800 Subject: tracing/function-graph-tracer: make arch generic push pop functions There is nothing really arch specific of the push and pop functions used by the function graph tracer. This patch moves them to generic code. Acked-by: Frederic Weisbecker Acked-by: Ingo Molnar Signed-off-by: Steven Rostedt --- arch/x86/include/asm/ftrace.h | 25 ------------ arch/x86/kernel/dumpstack.c | 1 + arch/x86/kernel/ftrace.c | 75 +----------------------------------- include/linux/ftrace.h | 24 ++++++++++++ kernel/trace/trace_functions_graph.c | 75 ++++++++++++++++++++++++++++++++++++ 5 files changed, 101 insertions(+), 99 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b55b4a7fbefd..db24c2278be0 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -55,29 +55,4 @@ struct dyn_arch_ftrace { #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -#ifndef __ASSEMBLY__ - -/* - * Stack of return addresses for functions - * of a thread. - * Used in struct thread_info - */ -struct ftrace_ret_stack { - unsigned long ret; - unsigned long func; - unsigned long long calltime; -}; - -/* - * Primary handler of a function return. - * It relays on ftrace_return_to_handler. - * Defined in entry_32/64.S - */ -extern void return_to_handler(void); - -#endif /* __ASSEMBLY__ */ -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6b1f6f6f8661..c0852291b623 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 231bdd3c5b1c..76f7141e0f91 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -389,79 +389,6 @@ void ftrace_nmi_exit(void) #endif /* !CONFIG_DYNAMIC_FTRACE */ -/* Add a function return address to the trace stack on thread info.*/ -static int push_return_trace(unsigned long ret, unsigned long long time, - unsigned long func, int *depth) -{ - int index; - - if (!current->ret_stack) - return -EBUSY; - - /* The return trace stack is full */ - if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { - atomic_inc(¤t->trace_overrun); - return -EBUSY; - } - - index = ++current->curr_ret_stack; - barrier(); - current->ret_stack[index].ret = ret; - current->ret_stack[index].func = func; - current->ret_stack[index].calltime = time; - *depth = index; - - return 0; -} - -/* Retrieve a function return address to the trace stack on thread info.*/ -static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) -{ - int index; - - index = current->curr_ret_stack; - - if (unlikely(index < 0)) { - ftrace_graph_stop(); - WARN_ON(1); - /* Might as well panic, otherwise we have no where to go */ - *ret = (unsigned long)panic; - return; - } - - *ret = current->ret_stack[index].ret; - trace->func = current->ret_stack[index].func; - trace->calltime = current->ret_stack[index].calltime; - trace->overrun = atomic_read(¤t->trace_overrun); - trace->depth = index; - barrier(); - current->curr_ret_stack--; - -} - -/* - * Send the trace to the ring-buffer. - * @return the original return address. - */ -unsigned long ftrace_return_to_handler(void) -{ - struct ftrace_graph_ret trace; - unsigned long ret; - - pop_return_trace(&trace, &ret); - trace.rettime = cpu_clock(raw_smp_processor_id()); - ftrace_graph_return(&trace); - - if (unlikely(!ret)) { - ftrace_graph_stop(); - WARN_ON(1); - /* Might as well panic. What else to do? */ - ret = (unsigned long)panic; - } - - return ret; -} - /* * Hook the return address and push it in the stack of return addrs * in current thread info. @@ -521,7 +448,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) calltime = cpu_clock(raw_smp_processor_id()); - if (push_return_trace(old, calltime, + if (ftrace_push_return_trace(old, calltime, self_addr, &trace.depth) == -EBUSY) { *parent = old; return; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 677432b9cb7e..a7f8134c594e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -379,6 +379,30 @@ struct ftrace_graph_ret { #ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * Stack of return addresses for functions + * of a thread. + * Used in struct thread_info + */ +struct ftrace_ret_stack { + unsigned long ret; + unsigned long func; + unsigned long long calltime; +}; + +/* + * Primary handler of a function return. + * It relays on ftrace_return_to_handler. + * Defined in entry_32/64.S + */ +extern void return_to_handler(void); + +extern int +ftrace_push_return_trace(unsigned long ret, unsigned long long time, + unsigned long func, int *depth); +extern void +ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); + /* * Sometimes we don't want to trace a function with the function * graph tracer but we want them to keep traced by the usual function diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 930c08e5b38e..dce71a5b51bc 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -42,6 +42,81 @@ static struct tracer_flags tracer_flags = { /* pid on the last trace processed */ static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 }; +/* Add a function return address to the trace stack on thread info.*/ +int +ftrace_push_return_trace(unsigned long ret, unsigned long long time, + unsigned long func, int *depth) +{ + int index; + + if (!current->ret_stack) + return -EBUSY; + + /* The return trace stack is full */ + if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { + atomic_inc(¤t->trace_overrun); + return -EBUSY; + } + + index = ++current->curr_ret_stack; + barrier(); + current->ret_stack[index].ret = ret; + current->ret_stack[index].func = func; + current->ret_stack[index].calltime = time; + *depth = index; + + return 0; +} + +/* Retrieve a function return address to the trace stack on thread info.*/ +void +ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) +{ + int index; + + index = current->curr_ret_stack; + + if (unlikely(index < 0)) { + ftrace_graph_stop(); + WARN_ON(1); + /* Might as well panic, otherwise we have no where to go */ + *ret = (unsigned long)panic; + return; + } + + *ret = current->ret_stack[index].ret; + trace->func = current->ret_stack[index].func; + trace->calltime = current->ret_stack[index].calltime; + trace->overrun = atomic_read(¤t->trace_overrun); + trace->depth = index; + barrier(); + current->curr_ret_stack--; + +} + +/* + * Send the trace to the ring-buffer. + * @return the original return address. + */ +unsigned long ftrace_return_to_handler(void) +{ + struct ftrace_graph_ret trace; + unsigned long ret; + + ftrace_pop_return_trace(&trace, &ret); + trace.rettime = cpu_clock(raw_smp_processor_id()); + ftrace_graph_return(&trace); + + if (unlikely(!ret)) { + ftrace_graph_stop(); + WARN_ON(1); + /* Might as well panic. What else to do? */ + ret = (unsigned long)panic; + } + + return ret; +} + static int graph_trace_init(struct trace_array *tr) { int cpu, ret; -- cgit v1.2.3 From b98103a5597b87211a1c74077b06faeac554bedc Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Sat, 21 Feb 2009 00:09:47 +0100 Subject: x86: hpet: print HPET registers during setup (if hpet=verbose is used) Signed-off-by: Andreas Herrmann Cc: Mark Hounschell Cc: Borislav Petkov Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 4 +++- arch/x86/kernel/hpet.c | 45 +++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b182626739ea..01379a822646 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -492,10 +492,12 @@ and is between 256 and 4096 characters. It is defined in the file Default: 64 hpet= [X86-32,HPET] option to control HPET usage - Format: { enable (default) | disable | force } + Format: { enable (default) | disable | force | + verbose } disable: disable HPET and use PIT instead force: allow force enabled of undocumented chips (ICH4, VIA, nVidia) + verbose: show contents of HPET registers during setup com20020= [HW,NET] ARCnet - COM20020 chipset Format: diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index a00545fe5cdd..1d86ca3c1f97 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -80,6 +80,7 @@ static inline void hpet_clear_mapping(void) */ static int boot_hpet_disable; int hpet_force_user; +static int hpet_verbose; static int __init hpet_setup(char *str) { @@ -88,6 +89,8 @@ static int __init hpet_setup(char *str) boot_hpet_disable = 1; if (!strncmp("force", str, 5)) hpet_force_user = 1; + if (!strncmp("verbose", str, 7)) + hpet_verbose = 1; } return 1; } @@ -119,6 +122,43 @@ int is_hpet_enabled(void) } EXPORT_SYMBOL_GPL(is_hpet_enabled); +static void _hpet_print_config(const char *function, int line) +{ + u32 i, timers, l, h; + printk(KERN_INFO "hpet: %s(%d):\n", function, line); + l = hpet_readl(HPET_ID); + h = hpet_readl(HPET_PERIOD); + timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; + printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h); + l = hpet_readl(HPET_CFG); + h = hpet_readl(HPET_STATUS); + printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h); + l = hpet_readl(HPET_COUNTER); + h = hpet_readl(HPET_COUNTER+4); + printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h); + + for (i = 0; i < timers; i++) { + l = hpet_readl(HPET_Tn_CFG(i)); + h = hpet_readl(HPET_Tn_CFG(i)+4); + printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", + i, l, h); + l = hpet_readl(HPET_Tn_CMP(i)); + h = hpet_readl(HPET_Tn_CMP(i)+4); + printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", + i, l, h); + l = hpet_readl(HPET_Tn_ROUTE(i)); + h = hpet_readl(HPET_Tn_ROUTE(i)+4); + printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", + i, l, h); + } +} + +#define hpet_print_config() \ +do { \ + if (hpet_verbose) \ + _hpet_print_config(__FUNCTION__, __LINE__); \ +} while (0) + /* * When the hpet driver (/dev/hpet) is enabled, we need to reserve * timer 0 and timer 1 in case of RTC emulation. @@ -282,6 +322,7 @@ static void hpet_set_mode(enum clock_event_mode mode, hpet_writel(cmp, HPET_Tn_CMP(timer)); udelay(1); hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); + hpet_print_config(); break; case CLOCK_EVT_MODE_ONESHOT: @@ -308,6 +349,7 @@ static void hpet_set_mode(enum clock_event_mode mode, irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } + hpet_print_config(); break; } } @@ -526,6 +568,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); num_timers++; /* Value read out starts from 0 */ + hpet_print_config(); hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); if (!hpet_devs) @@ -793,6 +836,7 @@ int __init hpet_enable(void) * information and the number of channels */ id = hpet_readl(HPET_ID); + hpet_print_config(); #ifdef CONFIG_HPET_EMULATE_RTC /* @@ -845,6 +889,7 @@ static __init int hpet_late_init(void) return -ENODEV; hpet_reserve_platform_timers(hpet_readl(HPET_ID)); + hpet_print_config(); for_each_online_cpu(cpu) { hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); -- cgit v1.2.3 From 8d6f0c8214928f7c5083dd54ecb69c5d615b516e Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Sat, 21 Feb 2009 00:10:44 +0100 Subject: x86: hpet: provide separate functions to stop and start the counter By splitting up existing hpet_start_counter function. Signed-off-by: Andreas Herrmann Cc: Mark Hounschell Cc: Borislav Petkov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 1d86ca3c1f97..7ae1f1e8b9b7 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -231,27 +231,37 @@ static struct clock_event_device hpet_clockevent = { .rating = 50, }; -static void hpet_start_counter(void) +static void hpet_stop_counter(void) { unsigned long cfg = hpet_readl(HPET_CFG); - cfg &= ~HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); hpet_writel(0, HPET_COUNTER); hpet_writel(0, HPET_COUNTER + 4); +} + +static void hpet_start_counter(void) +{ + unsigned long cfg = hpet_readl(HPET_CFG); cfg |= HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); } +static void hpet_restart_counter(void) +{ + hpet_stop_counter(); + hpet_start_counter(); +} + static void hpet_resume_device(void) { force_hpet_resume(); } -static void hpet_restart_counter(void) +static void hpet_resume_counter(void) { hpet_resume_device(); - hpet_start_counter(); + hpet_restart_counter(); } static void hpet_enable_legacy_int(void) @@ -738,7 +748,7 @@ static struct clocksource clocksource_hpet = { .mask = HPET_MASK, .shift = HPET_SHIFT, .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .resume = hpet_restart_counter, + .resume = hpet_resume_counter, #ifdef CONFIG_X86_64 .vread = vread_hpet, #endif @@ -750,7 +760,7 @@ static int hpet_clocksource_register(void) cycle_t t1; /* Start the counter */ - hpet_start_counter(); + hpet_restart_counter(); /* Verify whether hpet counter works */ t1 = read_hpet(); -- cgit v1.2.3 From c23e253e67c9d8a91a0ffa33c1f571a17f0a2403 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Sat, 21 Feb 2009 00:16:35 +0100 Subject: x86: hpet: stop HPET_COUNTER when programming periodic mode Impact: fix system hang on some systems operating with HZ_1000 On a system that stalled with HZ_1000, the first value written to T0_CMP (when the main counter was not stopped) did not trigger an interrupt. Instead after the main counter wrapped around (after several minutes) an interrupt was triggered and afterwards the periodic interrupt took effect. This can be fixed by implementing HPET spec recommendation for programming the periodic mode (i.e. stopping the main counter). Signed-off-by: Andreas Herrmann Cc: Mark Hounschell Cc: Borislav Petkov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 7ae1f1e8b9b7..648b3a2a3a44 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -309,29 +309,22 @@ static int hpet_setup_msi_irq(unsigned int irq); static void hpet_set_mode(enum clock_event_mode mode, struct clock_event_device *evt, int timer) { - unsigned long cfg, cmp, now; + unsigned long cfg; uint64_t delta; switch (mode) { case CLOCK_EVT_MODE_PERIODIC: + hpet_stop_counter(); delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; delta >>= evt->shift; - now = hpet_readl(HPET_COUNTER); - cmp = now + (unsigned long) delta; cfg = hpet_readl(HPET_Tn_CFG(timer)); /* Make sure we use edge triggered interrupts */ cfg &= ~HPET_TN_LEVEL; cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); - /* - * The first write after writing TN_SETVAL to the - * config register sets the counter value, the second - * write sets the period. - */ - hpet_writel(cmp, HPET_Tn_CMP(timer)); - udelay(1); hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); + hpet_start_counter(); hpet_print_config(); break; -- cgit v1.2.3 From fff78ad5cee1d6f695103ec590cbd2a9f3c39e8c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 22:28:42 -0500 Subject: [CPUFREQ] Stupidly trivial CodingStyle fix GNU indent complains about this being ambiguous, because it's dumb. One of my automated tests relies on the output of indent, so this shuts it up. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 1b446d79a8fd..9cd73d157437 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -94,7 +94,7 @@ static struct cpufreq_frequency_table *powernow_table; static unsigned int can_scale_bus; static unsigned int can_scale_vid; -static unsigned int minimum_speed=-1; +static unsigned int minimum_speed = -1; static unsigned int maximum_speed; static unsigned int number_scales; static unsigned int fsb; -- cgit v1.2.3 From b5c916666240032b29f73a1ca52c3e0fac37335c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 22:39:47 -0500 Subject: [CPUFREQ] checkpatch cleanups for cpufreq-nforce2 Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c | 38 +++++++++++++++------------ 1 file changed, 21 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c index 965ea52767ac..ad8284f0e865 100644 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -32,7 +32,7 @@ * nforce2_chipset: * FSB is changed using the chipset */ -static struct pci_dev *nforce2_chipset_dev; +static struct pci_dev *nforce2_dev; /* fid: * multiplier * 10 @@ -56,7 +56,8 @@ MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); MODULE_PARM_DESC(min_fsb, "Minimum FSB to use, if not defined: current FSB - 50"); -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "cpufreq-nforce2", msg) /** * nforce2_calc_fsb - calculate FSB @@ -118,11 +119,11 @@ static void nforce2_write_pll(int pll) int temp; /* Set the pll addr. to 0x00 */ - pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0); + pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0); /* Now write the value in all 64 registers */ for (temp = 0; temp <= 0x3f; temp++) - pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll); + pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll); return; } @@ -139,8 +140,8 @@ static unsigned int nforce2_fsb_read(int bootfsb) u32 fsb, temp = 0; /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ - nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, - 0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL); + nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF, + PCI_ANY_ID, PCI_ANY_ID, NULL); if (!nforce2_sub5) return 0; @@ -148,13 +149,13 @@ static unsigned int nforce2_fsb_read(int bootfsb) fsb /= 1000000; /* Check if PLL register is already set */ - pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); + pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp); if (bootfsb || !temp) return fsb; /* Use PLL register FSB value */ - pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp); + pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp); fsb = nforce2_calc_fsb(temp); return fsb; @@ -185,7 +186,7 @@ static int nforce2_set_fsb(unsigned int fsb) } /* First write? Then set actual value */ - pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); + pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp); if (!temp) { pll = nforce2_calc_pll(tfsb); @@ -197,7 +198,7 @@ static int nforce2_set_fsb(unsigned int fsb) /* Enable write access */ temp = 0x01; - pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp); + pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp); diff = tfsb - fsb; @@ -222,7 +223,7 @@ static int nforce2_set_fsb(unsigned int fsb) } temp = 0x40; - pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp); + pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp); return 0; } @@ -244,7 +245,8 @@ static unsigned int nforce2_get(unsigned int cpu) * nforce2_target - set a new CPUFreq policy * @policy: new policy * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * @relation: how that frequency relates to achieved frequency + * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) * * Sets a new CPUFreq policy. */ @@ -328,7 +330,8 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) if (!fid) { if (!cpu_khz) { printk(KERN_WARNING - "cpufreq: cpu_khz not set, can't calculate multiplier!\n"); + "cpufreq: cpu_khz not set, " + "can't calculate multiplier!\n"); return -ENODEV; } @@ -392,17 +395,18 @@ static struct cpufreq_driver nforce2_driver = { */ static unsigned int nforce2_detect_chipset(void) { - nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, + nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, PCI_ANY_ID, PCI_ANY_ID, NULL); - if (nforce2_chipset_dev == NULL) + if (nforce2_dev == NULL) return -ENODEV; printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", - nforce2_chipset_dev->revision); + nforce2_dev->revision); printk(KERN_INFO - "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n"); + "cpufreq: FSB changing is maybe unstable and can lead to " + "crashes and data loss.\n"); return 0; } -- cgit v1.2.3 From 20174b65d9fdc8dddef3d2ab9647e01fdab13064 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 22:42:19 -0500 Subject: [CPUFREQ] nforce2: Use driver prefix, not cpufreq prefix. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c index ad8284f0e865..99262906838c 100644 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -56,6 +56,7 @@ MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); MODULE_PARM_DESC(min_fsb, "Minimum FSB to use, if not defined: current FSB - 50"); +#define PFX "cpufreq-nforce2: " #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ "cpufreq-nforce2", msg) @@ -175,13 +176,13 @@ static int nforce2_set_fsb(unsigned int fsb) int pll = 0; if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { - printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); + printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb); return -EINVAL; } tfsb = nforce2_fsb_read(0); if (!tfsb) { - printk(KERN_ERR "cpufreq: Error while reading the FSB\n"); + printk(KERN_ERR PFX "Error while reading the FSB\n"); return -EINVAL; } @@ -278,7 +279,7 @@ static int nforce2_target(struct cpufreq_policy *policy, /* local_irq_save(flags); */ if (nforce2_set_fsb(target_fsb) < 0) - printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", + printk(KERN_ERR PFX "Changing FSB to %d failed\n", target_fsb); else dprintk("Changed FSB successfully to %d\n", @@ -329,9 +330,8 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) /* FIX: Get FID from CPU */ if (!fid) { if (!cpu_khz) { - printk(KERN_WARNING - "cpufreq: cpu_khz not set, " - "can't calculate multiplier!\n"); + printk(KERN_WARNING PFX + "cpu_khz not set, can't calculate multiplier!\n"); return -ENODEV; } @@ -346,7 +346,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) } } - printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb, + printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb, fid / 10, fid % 10); /* Set maximum FSB to FSB at boot time */ @@ -402,10 +402,10 @@ static unsigned int nforce2_detect_chipset(void) if (nforce2_dev == NULL) return -ENODEV; - printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", + printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n", nforce2_dev->revision); - printk(KERN_INFO - "cpufreq: FSB changing is maybe unstable and can lead to " + printk(KERN_INFO PFX + "FSB changing is maybe unstable and can lead to " "crashes and data loss.\n"); return 0; @@ -424,7 +424,7 @@ static int __init nforce2_init(void) /* detect chipset */ if (nforce2_detect_chipset()) { - printk(KERN_ERR "cpufreq: No nForce2 chipset.\n"); + printk(KERN_ERR PFX "No nForce2 chipset.\n"); return -ENODEV; } -- cgit v1.2.3 From 04cd1a99dc22bcaa1eccbe8f8386df8c1295e979 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 22:50:33 -0500 Subject: [CPUFREQ] checkpatch cleanups for elanfreq The remaining warning about the simple_strtoul conversion to strict_strtoul seems kind of pointless to me. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/elanfreq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index fe613c93b366..006b278b0d5d 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -184,7 +184,8 @@ static int elanfreq_target(struct cpufreq_policy *policy, { unsigned int newstate = 0; - if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate)) + if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], + target_freq, relation, &newstate)) return -EINVAL; elanfreq_set_cpu_state(newstate); @@ -301,7 +302,8 @@ static void __exit elanfreq_exit(void) module_param(max_freq, int, 0444); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Robert Schwebel , Sven Geggus "); +MODULE_AUTHOR("Robert Schwebel , " + "Sven Geggus "); MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); module_init(elanfreq_init); -- cgit v1.2.3 From c9b8c871525aeda153494996d84a832270205d81 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 22:54:47 -0500 Subject: [CPUFREQ] checkpatch cleanups for e_powersaver Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/e_powersaver.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c index c2f930d86640..3f83ea12c47a 100644 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c @@ -12,12 +12,12 @@ #include #include #include +#include +#include +#include #include #include -#include -#include -#include #define EPS_BRAND_C7M 0 #define EPS_BRAND_C7 1 @@ -184,7 +184,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy) break; } - switch(brand) { + switch (brand) { case EPS_BRAND_C7M: printk(KERN_CONT "C7-M\n"); break; @@ -218,17 +218,20 @@ static int eps_cpu_init(struct cpufreq_policy *policy) /* Print voltage and multiplier */ rdmsr(MSR_IA32_PERF_STATUS, lo, hi); current_voltage = lo & 0xff; - printk(KERN_INFO "eps: Current voltage = %dmV\n", current_voltage * 16 + 700); + printk(KERN_INFO "eps: Current voltage = %dmV\n", + current_voltage * 16 + 700); current_multiplier = (lo >> 8) & 0xff; printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier); /* Print limits */ max_voltage = hi & 0xff; - printk(KERN_INFO "eps: Highest voltage = %dmV\n", max_voltage * 16 + 700); + printk(KERN_INFO "eps: Highest voltage = %dmV\n", + max_voltage * 16 + 700); max_multiplier = (hi >> 8) & 0xff; printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier); min_voltage = (hi >> 16) & 0xff; - printk(KERN_INFO "eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700); + printk(KERN_INFO "eps: Lowest voltage = %dmV\n", + min_voltage * 16 + 700); min_multiplier = (hi >> 24) & 0xff; printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier); @@ -318,7 +321,7 @@ static int eps_cpu_exit(struct cpufreq_policy *policy) return 0; } -static struct freq_attr* eps_attr[] = { +static struct freq_attr *eps_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -356,7 +359,7 @@ static void __exit eps_exit(void) cpufreq_unregister_driver(&eps_driver); } -MODULE_AUTHOR("Rafa³ Bilski "); +MODULE_AUTHOR("Rafal Bilski "); MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 00f6a235bf241e62dfadf32b4322309e65c7b177 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 23:06:57 -0500 Subject: [CPUFREQ] checkpatch cleanups for gx-suspmod Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/gx-suspmod.c | 105 +++++++++++++++++++------------ 1 file changed, 65 insertions(+), 40 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index 9d9eae82e60f..ac27ec2264d5 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c @@ -79,8 +79,9 @@ #include #include #include +#include + #include -#include /* PCI config registers, all at F0 */ #define PCI_PMER1 0x80 /* power management enable register 1 */ @@ -122,8 +123,8 @@ static struct gxfreq_params *gx_params; static int stock_freq; /* PCI bus clock - defaults to 30.000 if cpu_khz is not available */ -static int pci_busclk = 0; -module_param (pci_busclk, int, 0444); +static int pci_busclk; +module_param(pci_busclk, int, 0444); /* maximum duration for which the cpu may be suspended * (32us * MAX_DURATION). If no parameter is given, this defaults @@ -132,7 +133,7 @@ module_param (pci_busclk, int, 0444); * is suspended -- processing power is just 0.39% of what it used to be, * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ static int max_duration = 255; -module_param (max_duration, int, 0444); +module_param(max_duration, int, 0444); /* For the default policy, we want at least some processing power * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV) @@ -140,7 +141,8 @@ module_param (max_duration, int, 0444); #define POLICY_MIN_DIV 20 -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "gx-suspmod", msg) /** * we can detect a core multipiler from dir0_lsb @@ -166,12 +168,20 @@ static int gx_freq_mult[16] = { * Low Level chipset interface * ****************************************************************/ static struct pci_device_id gx_chipset_tbl[] __initdata = { - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, + PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, + PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, + PCI_ANY_ID, PCI_ANY_ID }, { 0, }, }; +static void gx_write_byte(int reg, int value) +{ + pci_write_config_byte(gx_params->cs55x0, reg, value); +} + /** * gx_detect_chipset: * @@ -200,7 +210,8 @@ static __init struct pci_dev *gx_detect_chipset(void) /** * gx_get_cpuspeed: * - * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs. + * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi + * Geode CPU runs. */ static unsigned int gx_get_cpuspeed(unsigned int cpu) { @@ -217,17 +228,18 @@ static unsigned int gx_get_cpuspeed(unsigned int cpu) * **/ -static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration) +static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, + u8 *off_duration) { unsigned int i; u8 tmp_on, tmp_off; int old_tmp_freq = stock_freq; int tmp_freq; - *off_duration=1; - *on_duration=0; + *off_duration = 1; + *on_duration = 0; - for (i=max_duration; i>0; i--) { + for (i = max_duration; i > 0; i--) { tmp_off = ((khz * i) / stock_freq) & 0xff; tmp_on = i - tmp_off; tmp_freq = (stock_freq * tmp_off) / i; @@ -259,26 +271,34 @@ static void gx_set_cpuspeed(unsigned int khz) freqs.cpu = 0; freqs.old = gx_get_cpuspeed(0); - new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration); + new_khz = gx_validate_speed(khz, &gx_params->on_duration, + &gx_params->off_duration); freqs.new = new_khz; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); local_irq_save(flags); - if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */ + + + if (new_khz != stock_freq) { + /* if new khz == 100% of CPU speed, it is special case */ switch (gx_params->cs55x0->device) { case PCI_DEVICE_ID_CYRIX_5530_LEGACY: pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; /* FIXME: need to test other values -- Zwane,Miura */ - pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */ - pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ - pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); - - if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */ - suscfg = gx_params->pci_suscfg | SUSMOD; - } else { /* CS5530A,B.. */ - suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; + /* typical 2 to 4ms */ + gx_write_byte(PCI_IRQTC, 4); + /* typical 50 to 100ms */ + gx_write_byte(PCI_VIDTC, 100); + gx_write_byte(PCI_PMER1, pmer1); + + if (gx_params->cs55x0->revision < 0x10) { + /* CS5530(rev 1.2, 1.3) */ + suscfg = gx_params->pci_suscfg|SUSMOD; + } else { + /* CS5530A,B.. */ + suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE; } break; case PCI_DEVICE_ID_CYRIX_5520: @@ -294,13 +314,13 @@ static void gx_set_cpuspeed(unsigned int khz) suscfg = gx_params->pci_suscfg & ~(SUSMOD); gx_params->off_duration = 0; gx_params->on_duration = 0; - dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n"); + dprintk("suspend modulation disabled: cpu runs 100%% speed.\n"); } - pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration); - pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration); + gx_write_byte(PCI_MODOFF, gx_params->off_duration); + gx_write_byte(PCI_MODON, gx_params->on_duration); - pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); + gx_write_byte(PCI_SUSCFG, suscfg); pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); local_irq_restore(flags); @@ -334,7 +354,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy) return -EINVAL; policy->cpu = 0; - cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); + cpufreq_verify_within_limits(policy, (stock_freq / max_duration), + stock_freq); /* it needs to be assured that at least one supported frequency is * within policy->min and policy->max. If it is not, policy->max @@ -354,7 +375,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy) policy->max = tmp_freq; if (policy->max < policy->min) policy->max = policy->min; - cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); + cpufreq_verify_within_limits(policy, (stock_freq / max_duration), + stock_freq); return 0; } @@ -398,18 +420,18 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) return -ENODEV; /* determine maximum frequency */ - if (pci_busclk) { + if (pci_busclk) maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; - } else if (cpu_khz) { + else if (cpu_khz) maxfreq = cpu_khz; - } else { + else maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; - } + stock_freq = maxfreq; curfreq = gx_get_cpuspeed(0); dprintk("cpu max frequency is %d.\n", maxfreq); - dprintk("cpu current frequency is %dkHz.\n",curfreq); + dprintk("cpu current frequency is %dkHz.\n", curfreq); /* setup basic struct for cpufreq API */ policy->cpu = 0; @@ -447,7 +469,8 @@ static int __init cpufreq_gx_init(void) struct pci_dev *gx_pci; /* Test if we have the right hardware */ - if ((gx_pci = gx_detect_chipset()) == NULL) + gx_pci = gx_detect_chipset(); + if (gx_pci == NULL) return -ENODEV; /* check whether module parameters are sane */ @@ -468,9 +491,11 @@ static int __init cpufreq_gx_init(void) pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1)); pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); - pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); + pci_read_config_byte(params->cs55x0, PCI_MODOFF, + &(params->off_duration)); - if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { + ret = cpufreq_register_driver(&gx_suspmod_driver); + if (ret) { kfree(params); return ret; /* register error! */ } @@ -485,9 +510,9 @@ static void __exit cpufreq_gx_exit(void) kfree(gx_params); } -MODULE_AUTHOR ("Hiroshi Miura "); -MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Hiroshi Miura "); +MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); +MODULE_LICENSE("GPL"); module_init(cpufreq_gx_init); module_exit(cpufreq_gx_exit); -- cgit v1.2.3 From ac617bd0f7b959dc6708ad2f0d6b9dcf4382f1ed Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 23:29:53 -0500 Subject: [CPUFREQ] checkpatch cleanups for longhaul Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/longhaul.c | 182 +++++++++++++++++---------------- arch/x86/kernel/cpu/cpufreq/longhaul.h | 12 +-- 2 files changed, 100 insertions(+), 94 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index a4cff5d6e380..dc03622a83a0 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -30,12 +30,12 @@ #include #include #include +#include +#include +#include +#include #include -#include -#include -#include -#include #include #include "longhaul.h" @@ -58,7 +58,7 @@ #define USE_NORTHBRIDGE (1 << 2) static int cpu_model; -static unsigned int numscales=16; +static unsigned int numscales = 16; static unsigned int fsb; static const struct mV_pos *vrm_mV_table; @@ -67,8 +67,8 @@ static const unsigned char *mV_vrm_table; static unsigned int highest_speed, lowest_speed; /* kHz */ static unsigned int minmult, maxmult; static int can_scale_voltage; -static struct acpi_processor *pr = NULL; -static struct acpi_processor_cx *cx = NULL; +static struct acpi_processor *pr; +static struct acpi_processor_cx *cx; static u32 acpi_regs_addr; static u8 longhaul_flags; static unsigned int longhaul_index; @@ -78,12 +78,13 @@ static int scale_voltage; static int disable_acpi_c3; static int revid_errata; -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "longhaul", msg) /* Clock ratios multiplied by 10 */ -static int clock_ratio[32]; -static int eblcr_table[32]; +static int mults[32]; +static int eblcr[32]; static int longhaul_version; static struct cpufreq_frequency_table *longhaul_table; @@ -93,7 +94,7 @@ static char speedbuffer[8]; static char *print_speed(int speed) { if (speed < 1000) { - snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed); + snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed); return speedbuffer; } @@ -122,27 +123,28 @@ static unsigned int calc_speed(int mult) static int longhaul_get_cpu_mult(void) { - unsigned long invalue=0,lo, hi; + unsigned long invalue = 0, lo, hi; - rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); - invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22; - if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) { + rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi); + invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22; + if (longhaul_version == TYPE_LONGHAUL_V2 || + longhaul_version == TYPE_POWERSAVER) { if (lo & (1<<27)) - invalue+=16; + invalue += 16; } - return eblcr_table[invalue]; + return eblcr[invalue]; } /* For processor with BCR2 MSR */ -static void do_longhaul1(unsigned int clock_ratio_index) +static void do_longhaul1(unsigned int mults_index) { union msr_bcr2 bcr2; rdmsrl(MSR_VIA_BCR2, bcr2.val); /* Enable software clock multiplier */ bcr2.bits.ESOFTBF = 1; - bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff; + bcr2.bits.CLOCKMUL = mults_index & 0xff; /* Sync to timer tick */ safe_halt(); @@ -161,7 +163,7 @@ static void do_longhaul1(unsigned int clock_ratio_index) /* For processor with Longhaul MSR */ -static void do_powersaver(int cx_address, unsigned int clock_ratio_index, +static void do_powersaver(int cx_address, unsigned int mults_index, unsigned int dir) { union msr_longhaul longhaul; @@ -173,11 +175,11 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index, longhaul.bits.RevisionKey = longhaul.bits.RevisionID; else longhaul.bits.RevisionKey = 0; - longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; - longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; + longhaul.bits.SoftBusRatio = mults_index & 0xf; + longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4; /* Setup new voltage */ if (can_scale_voltage) - longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f; + longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f; /* Sync to timer tick */ safe_halt(); /* Raise voltage if necessary */ @@ -240,14 +242,14 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index, /** * longhaul_set_cpu_frequency() - * @clock_ratio_index : bitpattern of the new multiplier. + * @mults_index : bitpattern of the new multiplier. * * Sets a new clock ratio. */ static void longhaul_setstate(unsigned int table_index) { - unsigned int clock_ratio_index; + unsigned int mults_index; int speed, mult; struct cpufreq_freqs freqs; unsigned long flags; @@ -256,9 +258,9 @@ static void longhaul_setstate(unsigned int table_index) u32 bm_timeout = 1000; unsigned int dir = 0; - clock_ratio_index = longhaul_table[table_index].index; + mults_index = longhaul_table[table_index].index; /* Safety precautions */ - mult = clock_ratio[clock_ratio_index & 0x1f]; + mult = mults[mults_index & 0x1f]; if (mult == -1) return; speed = calc_speed(mult); @@ -274,7 +276,7 @@ static void longhaul_setstate(unsigned int table_index) cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", + dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", fsb, mult/10, mult%10, print_speed(speed/1000)); retry_loop: preempt_disable(); @@ -282,8 +284,8 @@ retry_loop: pic2_mask = inb(0xA1); pic1_mask = inb(0x21); /* works on C3. save mask. */ - outb(0xFF,0xA1); /* Overkill */ - outb(0xFE,0x21); /* TMR0 only */ + outb(0xFF, 0xA1); /* Overkill */ + outb(0xFE, 0x21); /* TMR0 only */ /* Wait while PCI bus is busy. */ if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE @@ -312,7 +314,7 @@ retry_loop: * Software controlled multipliers only. */ case TYPE_LONGHAUL_V1: - do_longhaul1(clock_ratio_index); + do_longhaul1(mults_index); break; /* @@ -327,9 +329,9 @@ retry_loop: if (longhaul_flags & USE_ACPI_C3) { /* Don't allow wakeup */ acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); - do_powersaver(cx->address, clock_ratio_index, dir); + do_powersaver(cx->address, mults_index, dir); } else { - do_powersaver(0, clock_ratio_index, dir); + do_powersaver(0, mults_index, dir); } break; } @@ -341,8 +343,8 @@ retry_loop: /* Enable bus master arbitration */ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); } - outb(pic2_mask,0xA1); /* restore mask */ - outb(pic1_mask,0x21); + outb(pic2_mask, 0xA1); /* restore mask */ + outb(pic1_mask, 0x21); local_irq_restore(flags); preempt_enable(); @@ -392,7 +394,8 @@ retry_loop: cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); if (!bm_timeout) - printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n"); + printk(KERN_INFO PFX "Warning: Timeout while waiting for " + "idle PCI bus.\n"); } /* @@ -458,31 +461,32 @@ static int __init longhaul_get_ranges(void) break; } - dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", + dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n", minmult/10, minmult%10, maxmult/10, maxmult%10); highest_speed = calc_speed(maxmult); lowest_speed = calc_speed(minmult); - dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, + dprintk("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, print_speed(lowest_speed/1000), print_speed(highest_speed/1000)); if (lowest_speed == highest_speed) { - printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n"); + printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n"); return -EINVAL; } if (lowest_speed > highest_speed) { - printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", + printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", lowest_speed, highest_speed); return -EINVAL; } - longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL); - if(!longhaul_table) + longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table), + GFP_KERNEL); + if (!longhaul_table) return -ENOMEM; for (j = 0; j < numscales; j++) { - ratio = clock_ratio[j]; + ratio = mults[j]; if (ratio == -1) continue; if (ratio > maxmult || ratio < minmult) @@ -521,7 +525,7 @@ static int __init longhaul_get_ranges(void) /* Find index we are running on */ for (j = 0; j < k; j++) { - if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) { + if (mults[longhaul_table[j].index & 0x1f] == mult) { longhaul_index = j; break; } @@ -559,20 +563,22 @@ static void __init longhaul_setup_voltagescaling(void) maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { - printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " + printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " "Voltage scaling disabled.\n", - minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000); + minvid.mV/1000, minvid.mV%1000, + maxvid.mV/1000, maxvid.mV%1000); return; } if (minvid.mV == maxvid.mV) { - printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are " - "both %d.%03d. Voltage scaling disabled\n", + printk(KERN_INFO PFX "Claims to support voltage scaling but " + "min & max are both %d.%03d. " + "Voltage scaling disabled\n", maxvid.mV/1000, maxvid.mV%1000); return; } - /* How many voltage steps */ + /* How many voltage steps*/ numvscales = maxvid.pos - minvid.pos + 1; printk(KERN_INFO PFX "Max VID=%d.%03d " @@ -586,7 +592,7 @@ static void __init longhaul_setup_voltagescaling(void) j = longhaul.bits.MinMHzBR; if (longhaul.bits.MinMHzBR4) j += 16; - min_vid_speed = eblcr_table[j]; + min_vid_speed = eblcr[j]; if (min_vid_speed == -1) return; switch (longhaul.bits.MinMHzFSB) { @@ -617,7 +623,8 @@ static void __init longhaul_setup_voltagescaling(void) pos = minvid.pos; longhaul_table[j].index |= mV_vrm_table[pos] << 8; vid = vrm_mV_table[mV_vrm_table[pos]]; - printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV); + printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", + speed, j, vid.mV); j++; } @@ -640,7 +647,8 @@ static int longhaul_target(struct cpufreq_policy *policy, unsigned int dir = 0; u8 vid, current_vid; - if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index)) + if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, + relation, &table_index)) return -EINVAL; /* Don't set same frequency again */ @@ -656,7 +664,8 @@ static int longhaul_target(struct cpufreq_policy *policy, * this in hardware, C3 is old and we need to do this * in software. */ i = longhaul_index; - current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f; + current_vid = (longhaul_table[longhaul_index].index >> 8); + current_vid &= 0x1f; if (table_index > longhaul_index) dir = 1; while (i != table_index) { @@ -691,9 +700,9 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle, { struct acpi_device *d; - if ( acpi_bus_get_device(obj_handle, &d) ) { + if (acpi_bus_get_device(obj_handle, &d)) return 0; - } + *return_value = acpi_driver_data(d); return 1; } @@ -750,7 +759,7 @@ static int longhaul_setup_southbridge(void) /* Find VT8235 southbridge */ dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); if (dev == NULL) - /* Find VT8237 southbridge */ + /* Find VT8237 southbridge */ dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, NULL); if (dev != NULL) { @@ -769,7 +778,8 @@ static int longhaul_setup_southbridge(void) if (pci_cmd & 1 << 7) { pci_read_config_dword(dev, 0x88, &acpi_regs_addr); acpi_regs_addr &= 0xff00; - printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr); + printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", + acpi_regs_addr); } pci_dev_put(dev); @@ -781,7 +791,7 @@ static int longhaul_setup_southbridge(void) static int __init longhaul_cpu_init(struct cpufreq_policy *policy) { struct cpuinfo_x86 *c = &cpu_data(0); - char *cpuname=NULL; + char *cpuname = NULL; int ret; u32 lo, hi; @@ -791,8 +801,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) cpu_model = CPU_SAMUEL; cpuname = "C3 'Samuel' [C5A]"; longhaul_version = TYPE_LONGHAUL_V1; - memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); - memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr)); + memcpy(mults, samuel1_mults, sizeof(samuel1_mults)); + memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr)); break; case 7: @@ -803,10 +813,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) cpuname = "C3 'Samuel 2' [C5B]"; /* Note, this is not a typo, early Samuel2's had * Samuel1 ratios. */ - memcpy(clock_ratio, samuel1_clock_ratio, - sizeof(samuel1_clock_ratio)); - memcpy(eblcr_table, samuel2_eblcr, - sizeof(samuel2_eblcr)); + memcpy(mults, samuel1_mults, sizeof(samuel1_mults)); + memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr)); break; case 1 ... 15: longhaul_version = TYPE_LONGHAUL_V1; @@ -817,10 +825,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) cpu_model = CPU_EZRA; cpuname = "C3 'Ezra' [C5C]"; } - memcpy(clock_ratio, ezra_clock_ratio, - sizeof(ezra_clock_ratio)); - memcpy(eblcr_table, ezra_eblcr, - sizeof(ezra_eblcr)); + memcpy(mults, ezra_mults, sizeof(ezra_mults)); + memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr)); break; } break; @@ -829,18 +835,16 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) cpu_model = CPU_EZRA_T; cpuname = "C3 'Ezra-T' [C5M]"; longhaul_version = TYPE_POWERSAVER; - numscales=32; - memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio)); - memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr)); + numscales = 32; + memcpy(mults, ezrat_mults, sizeof(ezrat_mults)); + memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr)); break; case 9: longhaul_version = TYPE_POWERSAVER; numscales = 32; - memcpy(clock_ratio, - nehemiah_clock_ratio, - sizeof(nehemiah_clock_ratio)); - memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr)); + memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults)); + memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr)); switch (c->x86_mask) { case 0 ... 1: cpu_model = CPU_NEHEMIAH; @@ -869,14 +873,14 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) longhaul_version = TYPE_LONGHAUL_V1; } - printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); + printk(KERN_INFO PFX "VIA %s CPU detected. ", cpuname); switch (longhaul_version) { case TYPE_LONGHAUL_V1: case TYPE_LONGHAUL_V2: - printk ("Longhaul v%d supported.\n", longhaul_version); + printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version); break; case TYPE_POWERSAVER: - printk ("Powersaver supported.\n"); + printk(KERN_CONT "Powersaver supported.\n"); break; }; @@ -940,7 +944,7 @@ static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) return 0; } -static struct freq_attr* longhaul_attr[] = { +static struct freq_attr *longhaul_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -966,13 +970,15 @@ static int __init longhaul_init(void) #ifdef CONFIG_SMP if (num_online_cpus() > 1) { - printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n"); + printk(KERN_ERR PFX "More than 1 CPU detected, " + "longhaul disabled.\n"); return -ENODEV; } #endif #ifdef CONFIG_X86_IO_APIC if (cpu_has_apic) { - printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n"); + printk(KERN_ERR PFX "APIC detected. Longhaul is currently " + "broken in this configuration.\n"); return -ENODEV; } #endif @@ -993,8 +999,8 @@ static void __exit longhaul_exit(void) { int i; - for (i=0; i < numscales; i++) { - if (clock_ratio[i] == maxmult) { + for (i = 0; i < numscales; i++) { + if (mults[i] == maxmult) { longhaul_setstate(i); break; } @@ -1007,11 +1013,11 @@ static void __exit longhaul_exit(void) /* Even if BIOS is exporting ACPI C3 state, and it is used * with success when CPU is idle, this state doesn't * trigger frequency transition in some cases. */ -module_param (disable_acpi_c3, int, 0644); +module_param(disable_acpi_c3, int, 0644); MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); /* Change CPU voltage with frequency. Very usefull to save * power, but most VIA C3 processors aren't supporting it. */ -module_param (scale_voltage, int, 0644); +module_param(scale_voltage, int, 0644); MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); /* Force revision key to 0 for processors which doesn't * support voltage scaling, but are introducing itself as @@ -1019,9 +1025,9 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); module_param(revid_errata, int, 0644); MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); -MODULE_AUTHOR ("Dave Jones "); -MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Dave Jones "); +MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors."); +MODULE_LICENSE("GPL"); late_initcall(longhaul_init); module_exit(longhaul_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h index 4fcc320997df..e2360a469f79 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.h +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h @@ -49,14 +49,14 @@ union msr_longhaul { /* * Clock ratio tables. Div/Mod by 10 to get ratio. - * The eblcr ones specify the ratio read from the CPU. - * The clock_ratio ones specify what to write to the CPU. + * The eblcr values specify the ratio read from the CPU. + * The mults values specify what to write to the CPU. */ /* * VIA C3 Samuel 1 & Samuel 2 (stepping 0) */ -static const int __initdata samuel1_clock_ratio[16] = { +static const int __initdata samuel1_mults[16] = { -1, /* 0000 -> RESERVED */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -119,7 +119,7 @@ static const int __initdata samuel2_eblcr[16] = { /* * VIA C3 Ezra */ -static const int __initdata ezra_clock_ratio[16] = { +static const int __initdata ezra_mults[16] = { 100, /* 0000 -> 10.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -160,7 +160,7 @@ static const int __initdata ezra_eblcr[16] = { /* * VIA C3 (Ezra-T) [C5M]. */ -static const int __initdata ezrat_clock_ratio[32] = { +static const int __initdata ezrat_mults[32] = { 100, /* 0000 -> 10.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -235,7 +235,7 @@ static const int __initdata ezrat_eblcr[32] = { /* * VIA C3 Nehemiah */ -static const int __initdata nehemiah_clock_ratio[32] = { +static const int __initdata nehemiah_mults[32] = { 100, /* 0000 -> 10.0x */ -1, /* 0001 -> 16.0x */ 40, /* 0010 -> 4.0x */ -- cgit v1.2.3 From 48ee923a666d4cc54e48d55fc573c57492501122 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 23:32:50 -0500 Subject: [CPUFREQ] checkpatch cleanups for longrun Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/longrun.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index 777a7ff075de..da5f70fcb766 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -11,12 +11,13 @@ #include #include #include +#include #include #include -#include -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "longrun", msg) static struct cpufreq_driver longrun_driver; @@ -51,7 +52,7 @@ static void __init longrun_get_policy(struct cpufreq_policy *policy) msr_lo &= 0x0000007F; msr_hi &= 0x0000007F; - if ( longrun_high_freq <= longrun_low_freq ) { + if (longrun_high_freq <= longrun_low_freq) { /* Assume degenerate Longrun table */ policy->min = policy->max = longrun_high_freq; } else { @@ -79,7 +80,7 @@ static int longrun_set_policy(struct cpufreq_policy *policy) if (!policy) return -EINVAL; - if ( longrun_high_freq <= longrun_low_freq ) { + if (longrun_high_freq <= longrun_low_freq) { /* Assume degenerate Longrun table */ pctg_lo = pctg_hi = 100; } else { @@ -152,7 +153,7 @@ static unsigned int longrun_get(unsigned int cpu) cpuid(0x80860007, &eax, &ebx, &ecx, &edx); dprintk("cpuid eax is %u\n", eax); - return (eax * 1000); + return eax * 1000; } /** @@ -196,7 +197,8 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); *high_freq = msr_lo * 1000; /* to kHz */ - dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq); + dprintk("longrun table interface told %u - %u kHz\n", + *low_freq, *high_freq); if (*low_freq > *high_freq) *low_freq = *high_freq; @@ -219,7 +221,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, cpuid(0x80860007, &eax, &ebx, &ecx, &edx); /* try decreasing in 10% steps, some processors react only * on some barrier values */ - for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) { + for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) { /* set to 0 to try_hi perf_pctg */ msr_lo &= 0xFFFFFF80; msr_hi &= 0xFFFFFF80; @@ -236,7 +238,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) * eqals - * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) + * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) * * high_freq * perf_pctg is stored tempoarily into "ebx". */ @@ -317,9 +319,10 @@ static void __exit longrun_exit(void) } -MODULE_AUTHOR ("Dominik Brodowski "); -MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Dominik Brodowski "); +MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and " + "Efficeon processors."); +MODULE_LICENSE("GPL"); module_init(longrun_init); module_exit(longrun_exit); -- cgit v1.2.3 From 14a6650f13b958aabc30ddd575b0902384b22457 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 18 Jan 2009 00:00:04 -0500 Subject: [CPUFREQ] checkpatch cleanups for powernow-k6 Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k6.c | 44 ++++++++++++++++++------------- 1 file changed, 26 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index c1ac5790c63e..f10dea409f40 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -1,6 +1,7 @@ /* * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) - * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. + * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, + * Dominik Brodowski. * * Licensed under the terms of the GNU GPL License version 2. * @@ -13,14 +14,15 @@ #include #include #include - -#include #include #include +#include + #define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long as it is unused */ +#define PFX "powernow-k6: " static unsigned int busfreq; /* FSB, in 10 kHz */ static unsigned int max_multiplier; @@ -47,8 +49,8 @@ static struct cpufreq_frequency_table clock_ratio[] = { */ static int powernow_k6_get_cpu_multiplier(void) { - u64 invalue = 0; - u32 msrval; + u64 invalue = 0; + u32 msrval; msrval = POWERNOW_IOPORT + 0x1; wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ @@ -68,12 +70,12 @@ static int powernow_k6_get_cpu_multiplier(void) */ static void powernow_k6_set_state(unsigned int best_i) { - unsigned long outvalue = 0, invalue = 0; - unsigned long msrval; - struct cpufreq_freqs freqs; + unsigned long outvalue = 0, invalue = 0; + unsigned long msrval; + struct cpufreq_freqs freqs; if (clock_ratio[best_i].index > max_multiplier) { - printk(KERN_ERR "cpufreq: invalid target frequency\n"); + printk(KERN_ERR PFX "invalid target frequency\n"); return; } @@ -119,7 +121,8 @@ static int powernow_k6_verify(struct cpufreq_policy *policy) * powernow_k6_setpolicy - sets a new CPUFreq policy * @policy: new policy * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * @relation: how that frequency relates to achieved frequency + * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) * * sets a new CPUFreq policy */ @@ -127,9 +130,10 @@ static int powernow_k6_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { - unsigned int newstate = 0; + unsigned int newstate = 0; - if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate)) + if (cpufreq_frequency_table_target(policy, &clock_ratio[0], + target_freq, relation, &newstate)) return -EINVAL; powernow_k6_set_state(newstate); @@ -140,7 +144,7 @@ static int powernow_k6_target(struct cpufreq_policy *policy, static int powernow_k6_cpu_init(struct cpufreq_policy *policy) { - unsigned int i; + unsigned int i, f; int result; if (policy->cpu != 0) @@ -152,10 +156,11 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) /* table init */ for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { - if (clock_ratio[i].index > max_multiplier) + f = clock_ratio[i].index; + if (f > max_multiplier) clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; else - clock_ratio[i].frequency = busfreq * clock_ratio[i].index; + clock_ratio[i].frequency = busfreq * f; } /* cpuinfo and default policy values */ @@ -185,7 +190,9 @@ static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) static unsigned int powernow_k6_get(unsigned int cpu) { - return busfreq * powernow_k6_get_cpu_multiplier(); + unsigned int ret; + ret = (busfreq * powernow_k6_get_cpu_multiplier()); + return ret; } static struct freq_attr *powernow_k6_attr[] = { @@ -221,7 +228,7 @@ static int __init powernow_k6_init(void) return -ENODEV; if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { - printk("cpufreq: PowerNow IOPORT region already used.\n"); + printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n"); return -EIO; } @@ -246,7 +253,8 @@ static void __exit powernow_k6_exit(void) } -MODULE_AUTHOR("Arjan van de Ven, Dave Jones , Dominik Brodowski "); +MODULE_AUTHOR("Arjan van de Ven, Dave Jones , " + "Dominik Brodowski "); MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 6072ace436800a011c3cb9a75ee49276bd90445a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 18 Jan 2009 01:27:35 -0500 Subject: [CPUFREQ] checkpatch cleanups for sc520 Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/sc520_freq.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c index 42da9bd677d6..435a996a613a 100644 --- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c +++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c @@ -19,17 +19,19 @@ #include #include +#include +#include #include -#include -#include #define MMCR_BASE 0xfffef000 /* The default base address */ #define OFFS_CPUCTL 0x2 /* CPU Control Register */ static __u8 __iomem *cpuctl; -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "sc520_freq", msg) +#define PFX "sc520_freq: " static struct cpufreq_frequency_table sc520_freq_table[] = { {0x01, 100000}, @@ -43,7 +45,8 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) switch (clockspeed_reg & 0x03) { default: - printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg); + printk(KERN_ERR PFX "error: cpuctl register has unexpected " + "value %02x\n", clockspeed_reg); case 0x01: return 100000; case 0x02: @@ -51,7 +54,7 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) } } -static void sc520_freq_set_cpu_state (unsigned int state) +static void sc520_freq_set_cpu_state(unsigned int state) { struct cpufreq_freqs freqs; @@ -76,18 +79,19 @@ static void sc520_freq_set_cpu_state (unsigned int state) cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); }; -static int sc520_freq_verify (struct cpufreq_policy *policy) +static int sc520_freq_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); } -static int sc520_freq_target (struct cpufreq_policy *policy, +static int sc520_freq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { unsigned int newstate = 0; - if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate)) + if (cpufreq_frequency_table_target(policy, sc520_freq_table, + target_freq, relation, &newstate)) return -EINVAL; sc520_freq_set_cpu_state(newstate); @@ -116,7 +120,7 @@ static int sc520_freq_cpu_init(struct cpufreq_policy *policy) result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); if (result) - return (result); + return result; cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); @@ -131,7 +135,7 @@ static int sc520_freq_cpu_exit(struct cpufreq_policy *policy) } -static struct freq_attr* sc520_freq_attr[] = { +static struct freq_attr *sc520_freq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -155,13 +159,13 @@ static int __init sc520_freq_init(void) int err; /* Test if we have the right hardware */ - if(c->x86_vendor != X86_VENDOR_AMD || - c->x86 != 4 || c->x86_model != 9) { + if (c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) { dprintk("no Elan SC520 processor found!\n"); return -ENODEV; } cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); - if(!cpuctl) { + if (!cpuctl) { printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); return -ENOMEM; } -- cgit v1.2.3 From bbfebd66554b934b270c4c49442f4fe5e62df0e5 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 23:55:22 -0500 Subject: [CPUFREQ] checkpatch cleanups for speedstep related drivers. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 60 +++++----- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 70 ++++++------ arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 129 ++++++++++++--------- arch/x86/kernel/cpu/cpufreq/speedstep-lib.h | 18 +-- arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | 166 +++++++++++++++++----------- 5 files changed, 258 insertions(+), 185 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index b585e04cbc9e..46a2a7a53148 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -27,15 +27,16 @@ #include #include #include +#include #include #include -#include #include "speedstep-lib.h" #define PFX "p4-clockmod: " -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "p4-clockmod", msg) /* * Duty Cycle (3bits), note DC_DISABLE is not specified in @@ -58,7 +59,8 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) { u32 l, h; - if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) + if (!cpu_online(cpu) || + (newstate > DC_DISABLE) || (newstate == DC_RESV)) return -EINVAL; rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h); @@ -66,7 +68,8 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) if (l & 0x01) dprintk("CPU#%d currently thermal throttled\n", cpu); - if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT)) + if (has_N44_O17_errata[cpu] && + (newstate == DC_25PT || newstate == DC_DFLT)) newstate = DC_38PT; rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); @@ -112,7 +115,8 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; int i; - if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate)) + if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], + target_freq, relation, &newstate)) return -EINVAL; freqs.old = cpufreq_p4_get(policy->cpu); @@ -127,7 +131,8 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } - /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software + /* run on each logical CPU, + * see section 13.15.3 of IA32 Intel Architecture Software * Developer's Manual, Volume 3 */ for_each_cpu(i, policy->cpus) @@ -153,28 +158,30 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) { if (c->x86 == 0x06) { if (cpu_has(c, X86_FEATURE_EST)) - printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. " - "The acpi-cpufreq module offers voltage scaling" - " in addition of frequency scaling. You should use " - "that instead of p4-clockmod, if possible.\n"); + printk(KERN_WARNING PFX "Warning: EST-capable CPU " + "detected. The acpi-cpufreq module offers " + "voltage scaling in addition of frequency " + "scaling. You should use that instead of " + "p4-clockmod, if possible.\n"); switch (c->x86_model) { case 0x0E: /* Core */ case 0x0F: /* Core Duo */ case 0x16: /* Celeron Core */ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); + return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE); case 0x0D: /* Pentium M (Dothan) */ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; /* fall through */ case 0x09: /* Pentium M (Banias) */ - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); + return speedstep_get_frequency(SPEEDSTEP_CPU_PM); } } if (c->x86 != 0xF) { if (!cpu_has(c, X86_FEATURE_EST)) - printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. " - "Please send an e-mail to \n"); + printk(KERN_WARNING PFX "Unknown CPU. " + "Please send an e-mail to " + "\n"); return 0; } @@ -182,16 +189,16 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) * throttling is active or not. */ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) { + if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) { printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " "The speedstep-ich or acpi cpufreq modules offer " "voltage scaling in addition of frequency scaling. " "You should use either one instead of p4-clockmod, " "if possible.\n"); - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M); + return speedstep_get_frequency(SPEEDSTEP_CPU_P4M); } - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D); + return speedstep_get_frequency(SPEEDSTEP_CPU_P4D); } @@ -223,8 +230,8 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) return -EINVAL; /* table init */ - for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { - if ((i<2) && (has_N44_O17_errata[policy->cpu])) + for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { + if ((i < 2) && (has_N44_O17_errata[policy->cpu])) p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; else p4clockmod_table[i].frequency = (stock_freq * i)/8; @@ -258,12 +265,12 @@ static unsigned int cpufreq_p4_get(unsigned int cpu) l = DC_DISABLE; if (l != DC_DISABLE) - return (stock_freq * l / 8); + return stock_freq * l / 8; return stock_freq; } -static struct freq_attr* p4clockmod_attr[] = { +static struct freq_attr *p4clockmod_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -299,9 +306,10 @@ static int __init cpufreq_p4_init(void) ret = cpufreq_register_driver(&p4clockmod_driver); if (!ret) - printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n"); + printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock " + "Modulation available\n"); - return (ret); + return ret; } @@ -311,9 +319,9 @@ static void __exit cpufreq_p4_exit(void) } -MODULE_AUTHOR ("Zwane Mwaikambo "); -MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Zwane Mwaikambo "); +MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); +MODULE_LICENSE("GPL"); late_initcall(cpufreq_p4_init); module_exit(cpufreq_p4_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index dedc1e98f168..8bbb11adb315 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev; /* speedstep_processor */ -static unsigned int speedstep_processor = 0; +static unsigned int speedstep_processor; static u32 pmbase; @@ -54,7 +54,8 @@ static struct cpufreq_frequency_table speedstep_freqs[] = { }; -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "speedstep-ich", msg) /** @@ -62,7 +63,7 @@ static struct cpufreq_frequency_table speedstep_freqs[] = { * * Returns: -ENODEV if no register could be found */ -static int speedstep_find_register (void) +static int speedstep_find_register(void) { if (!speedstep_chipset_dev) return -ENODEV; @@ -90,7 +91,7 @@ static int speedstep_find_register (void) * * Tries to change the SpeedStep state. */ -static void speedstep_set_state (unsigned int state) +static void speedstep_set_state(unsigned int state) { u8 pm2_blk; u8 value; @@ -133,11 +134,11 @@ static void speedstep_set_state (unsigned int state) dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); - if (state == (value & 0x1)) { - dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000)); - } else { - printk (KERN_ERR "cpufreq: change failed - I/O error\n"); - } + if (state == (value & 0x1)) + dprintk("change to %u MHz succeeded\n", + speedstep_get_frequency(speedstep_processor) / 1000); + else + printk(KERN_ERR "cpufreq: change failed - I/O error\n"); return; } @@ -149,7 +150,7 @@ static void speedstep_set_state (unsigned int state) * Tries to activate the SpeedStep status and control registers. * Returns -EINVAL on an unsupported chipset, and zero on success. */ -static int speedstep_activate (void) +static int speedstep_activate(void) { u16 value = 0; @@ -175,20 +176,18 @@ static int speedstep_activate (void) * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected * chipset, or zero on failure. */ -static unsigned int speedstep_detect_chipset (void) +static unsigned int speedstep_detect_chipset(void) { speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, - PCI_ANY_ID, - PCI_ANY_ID, + PCI_ANY_ID, PCI_ANY_ID, NULL); if (speedstep_chipset_dev) return 4; /* 4-M */ speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, - PCI_ANY_ID, - PCI_ANY_ID, + PCI_ANY_ID, PCI_ANY_ID, NULL); if (speedstep_chipset_dev) return 3; /* 3-M */ @@ -196,8 +195,7 @@ static unsigned int speedstep_detect_chipset (void) speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_10, - PCI_ANY_ID, - PCI_ANY_ID, + PCI_ANY_ID, PCI_ANY_ID, NULL); if (speedstep_chipset_dev) { /* speedstep.c causes lockups on Dell Inspirons 8000 and @@ -208,8 +206,7 @@ static unsigned int speedstep_detect_chipset (void) hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_MC, - PCI_ANY_ID, - PCI_ANY_ID, + PCI_ANY_ID, PCI_ANY_ID, NULL); if (!hostbridge) @@ -236,7 +233,7 @@ static unsigned int _speedstep_get(const struct cpumask *cpus) cpus_allowed = current->cpus_allowed; set_cpus_allowed_ptr(current, cpus); - speed = speedstep_get_processor_frequency(speedstep_processor); + speed = speedstep_get_frequency(speedstep_processor); set_cpus_allowed_ptr(current, &cpus_allowed); dprintk("detected %u kHz as current frequency\n", speed); return speed; @@ -251,11 +248,12 @@ static unsigned int speedstep_get(unsigned int cpu) * speedstep_target - set a new CPUFreq policy * @policy: new policy * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * @relation: how that frequency relates to achieved frequency + * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) * * Sets a new CPUFreq policy. */ -static int speedstep_target (struct cpufreq_policy *policy, +static int speedstep_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { @@ -264,7 +262,8 @@ static int speedstep_target (struct cpufreq_policy *policy, cpumask_t cpus_allowed; int i; - if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) + if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], + target_freq, relation, &newstate)) return -EINVAL; freqs.old = _speedstep_get(policy->cpus); @@ -308,7 +307,7 @@ static int speedstep_target (struct cpufreq_policy *policy, * Limit must be within speedstep_low_freq and speedstep_high_freq, with * at least one border included. */ -static int speedstep_verify (struct cpufreq_policy *policy) +static int speedstep_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); } @@ -344,7 +343,8 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) return -EIO; dprintk("currently at %s speed setting - %i MHz\n", - (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", + (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) + ? "low" : "high", (speed / 1000)); /* cpuinfo and default policy values */ @@ -352,9 +352,9 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); if (result) - return (result); + return result; - cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); + cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); return 0; } @@ -366,7 +366,7 @@ static int speedstep_cpu_exit(struct cpufreq_policy *policy) return 0; } -static struct freq_attr* speedstep_attr[] = { +static struct freq_attr *speedstep_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -396,13 +396,15 @@ static int __init speedstep_init(void) /* detect processor */ speedstep_processor = speedstep_detect_processor(); if (!speedstep_processor) { - dprintk("Intel(R) SpeedStep(TM) capable processor not found\n"); + dprintk("Intel(R) SpeedStep(TM) capable processor " + "not found\n"); return -ENODEV; } /* detect chipset */ if (!speedstep_detect_chipset()) { - dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n"); + dprintk("Intel(R) SpeedStep(TM) for this chipset not " + "(yet) available.\n"); return -ENODEV; } @@ -431,9 +433,11 @@ static void __exit speedstep_exit(void) } -MODULE_AUTHOR ("Dave Jones , Dominik Brodowski "); -MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Dave Jones , " + "Dominik Brodowski "); +MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets " + "with ICH-M southbridges."); +MODULE_LICENSE("GPL"); module_init(speedstep_init); module_exit(speedstep_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index cdac7d62369b..55c696daa05c 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -18,10 +18,13 @@ #include #include "speedstep-lib.h" -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "speedstep-lib", msg) + +#define PFX "speedstep-lib: " #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK -static int relaxed_check = 0; +static int relaxed_check; #else #define relaxed_check 0 #endif @@ -30,14 +33,14 @@ static int relaxed_check = 0; * GET PROCESSOR CORE SPEED IN KHZ * *********************************************************************/ -static unsigned int pentium3_get_frequency (unsigned int processor) +static unsigned int pentium3_get_frequency(unsigned int processor) { - /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ + /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ struct { unsigned int ratio; /* Frequency Multiplier (x10) */ u8 bitmap; /* power on configuration bits [27, 25:22] (in MSR 0x2a) */ - } msr_decode_mult [] = { + } msr_decode_mult[] = { { 30, 0x01 }, { 35, 0x05 }, { 40, 0x02 }, @@ -52,7 +55,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor) { 85, 0x26 }, { 90, 0x20 }, { 100, 0x2b }, - { 0, 0xff } /* error or unknown value */ + { 0, 0xff } /* error or unknown value */ }; /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ @@ -60,7 +63,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor) unsigned int value; /* Front Side Bus speed in MHz */ u8 bitmap; /* power on configuration bits [18: 19] (in MSR 0x2a) */ - } msr_decode_fsb [] = { + } msr_decode_fsb[] = { { 66, 0x0 }, { 100, 0x2 }, { 133, 0x1 }, @@ -85,7 +88,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor) } /* decode the multiplier */ - if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) { + if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) { dprintk("workaround for early PIIIs\n"); msr_lo &= 0x03c00000; } else @@ -97,9 +100,10 @@ static unsigned int pentium3_get_frequency (unsigned int processor) j++; } - dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); + dprintk("speed is %u\n", + (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); - return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100); + return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100; } @@ -112,20 +116,23 @@ static unsigned int pentiumM_get_frequency(void) /* see table B-2 of 24547212.pdf */ if (msr_lo & 0x00040000) { - printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp); + printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n", + msr_lo, msr_tmp); return 0; } msr_tmp = (msr_lo >> 22) & 0x1f; - dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000)); + dprintk("bits 22-26 are 0x%x, speed is %u\n", + msr_tmp, (msr_tmp * 100 * 1000)); - return (msr_tmp * 100 * 1000); + return msr_tmp * 100 * 1000; } static unsigned int pentium_core_get_frequency(void) { u32 fsb = 0; u32 msr_lo, msr_tmp; + int ret; rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); /* see table B-2 of 25366920.pdf */ @@ -153,12 +160,15 @@ static unsigned int pentium_core_get_frequency(void) } rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); - dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); + dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", + msr_lo, msr_tmp); msr_tmp = (msr_lo >> 22) & 0x1f; - dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb)); + dprintk("bits 22-26 are 0x%x, speed is %u\n", + msr_tmp, (msr_tmp * fsb)); - return (msr_tmp * fsb); + ret = (msr_tmp * fsb); + return ret; } @@ -167,6 +177,7 @@ static unsigned int pentium4_get_frequency(void) struct cpuinfo_x86 *c = &boot_cpu_data; u32 msr_lo, msr_hi, mult; unsigned int fsb = 0; + unsigned int ret; rdmsr(0x2c, msr_lo, msr_hi); @@ -195,44 +206,47 @@ static unsigned int pentium4_get_frequency(void) } if (!fsb) - printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to \n"); + printk(KERN_DEBUG PFX "couldn't detect FSB speed. " + "Please send an e-mail to \n"); /* Multiplier. */ mult = msr_lo >> 24; - dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult)); + dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", + fsb, mult, (fsb * mult)); - return (fsb * mult); + ret = (fsb * mult); + return ret; } -unsigned int speedstep_get_processor_frequency(unsigned int processor) +unsigned int speedstep_get_frequency(unsigned int processor) { switch (processor) { - case SPEEDSTEP_PROCESSOR_PCORE: + case SPEEDSTEP_CPU_PCORE: return pentium_core_get_frequency(); - case SPEEDSTEP_PROCESSOR_PM: + case SPEEDSTEP_CPU_PM: return pentiumM_get_frequency(); - case SPEEDSTEP_PROCESSOR_P4D: - case SPEEDSTEP_PROCESSOR_P4M: + case SPEEDSTEP_CPU_P4D: + case SPEEDSTEP_CPU_P4M: return pentium4_get_frequency(); - case SPEEDSTEP_PROCESSOR_PIII_T: - case SPEEDSTEP_PROCESSOR_PIII_C: - case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: + case SPEEDSTEP_CPU_PIII_T: + case SPEEDSTEP_CPU_PIII_C: + case SPEEDSTEP_CPU_PIII_C_EARLY: return pentium3_get_frequency(processor); default: return 0; }; return 0; } -EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); +EXPORT_SYMBOL_GPL(speedstep_get_frequency); /********************************************************************* * DETECT SPEEDSTEP-CAPABLE PROCESSOR * *********************************************************************/ -unsigned int speedstep_detect_processor (void) +unsigned int speedstep_detect_processor(void) { struct cpuinfo_x86 *c = &cpu_data(0); u32 ebx, msr_lo, msr_hi; @@ -261,7 +275,7 @@ unsigned int speedstep_detect_processor (void) * sample has ebx = 0x0f, production has 0x0e. */ if ((ebx == 0x0e) || (ebx == 0x0f)) - return SPEEDSTEP_PROCESSOR_P4M; + return SPEEDSTEP_CPU_P4M; break; case 7: /* @@ -272,7 +286,7 @@ unsigned int speedstep_detect_processor (void) * samples are only of B-stepping... */ if (ebx == 0x0e) - return SPEEDSTEP_PROCESSOR_P4M; + return SPEEDSTEP_CPU_P4M; break; case 9: /* @@ -288,10 +302,13 @@ unsigned int speedstep_detect_processor (void) * M-P4-Ms may have either ebx=0xe or 0xf [see above] * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] * also, M-P4M HTs have ebx=0x8, too - * For now, they are distinguished by the model_id string + * For now, they are distinguished by the model_id + * string */ - if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) - return SPEEDSTEP_PROCESSOR_P4M; + if ((ebx == 0x0e) || + (strstr(c->x86_model_id, + "Mobile Intel(R) Pentium(R) 4") != NULL)) + return SPEEDSTEP_CPU_P4M; break; default: break; @@ -301,7 +318,8 @@ unsigned int speedstep_detect_processor (void) switch (c->x86_model) { case 0x0B: /* Intel PIII [Tualatin] */ - /* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */ + /* cpuid_ebx(1) is 0x04 for desktop PIII, + * 0x06 for mobile PIII-M */ ebx = cpuid_ebx(0x00000001); dprintk("ebx is %x\n", ebx); @@ -313,14 +331,15 @@ unsigned int speedstep_detect_processor (void) /* So far all PIII-M processors support SpeedStep. See * Intel's 24540640.pdf of June 2003 */ - return SPEEDSTEP_PROCESSOR_PIII_T; + return SPEEDSTEP_CPU_PIII_T; case 0x08: /* Intel PIII [Coppermine] */ /* all mobile PIII Coppermines have FSB 100 MHz * ==> sort out a few desktop PIIIs. */ rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi); - dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi); + dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", + msr_lo, msr_hi); msr_lo &= 0x00c0000; if (msr_lo != 0x0080000) return 0; @@ -332,13 +351,15 @@ unsigned int speedstep_detect_processor (void) * bit 56 or 57 is set */ rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi); - dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi); - if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) { + dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", + msr_lo, msr_hi); + if ((msr_hi & (1<<18)) && + (relaxed_check ? 1 : (msr_hi & (3<<24)))) { if (c->x86_mask == 0x01) { dprintk("early PIII version\n"); - return SPEEDSTEP_PROCESSOR_PIII_C_EARLY; + return SPEEDSTEP_CPU_PIII_C_EARLY; } else - return SPEEDSTEP_PROCESSOR_PIII_C; + return SPEEDSTEP_CPU_PIII_C; } default: @@ -369,7 +390,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, dprintk("trying to determine both speeds\n"); /* get current speed */ - prev_speed = speedstep_get_processor_frequency(processor); + prev_speed = speedstep_get_frequency(processor); if (!prev_speed) return -EIO; @@ -379,7 +400,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, /* switch to low state */ set_state(SPEEDSTEP_LOW); - *low_speed = speedstep_get_processor_frequency(processor); + *low_speed = speedstep_get_frequency(processor); if (!*low_speed) { ret = -EIO; goto out; @@ -398,7 +419,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, if (transition_latency) do_gettimeofday(&tv2); - *high_speed = speedstep_get_processor_frequency(processor); + *high_speed = speedstep_get_frequency(processor); if (!*high_speed) { ret = -EIO; goto out; @@ -426,9 +447,12 @@ unsigned int speedstep_get_freqs(unsigned int processor, /* check if the latency measurement is too high or too low * and set it to a safe value (500uSec) in that case */ - if (*transition_latency > 10000000 || *transition_latency < 50000) { - printk (KERN_WARNING "speedstep: frequency transition measured seems out of " - "range (%u nSec), falling back to a safe one of %u nSec.\n", + if (*transition_latency > 10000000 || + *transition_latency < 50000) { + printk(KERN_WARNING PFX "frequency transition " + "measured seems out of range (%u " + "nSec), falling back to a safe one of" + "%u nSec.\n", *transition_latency, 500000); *transition_latency = 500000; } @@ -436,15 +460,16 @@ unsigned int speedstep_get_freqs(unsigned int processor, out: local_irq_restore(flags); - return (ret); + return ret; } EXPORT_SYMBOL_GPL(speedstep_get_freqs); #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK module_param(relaxed_check, int, 0444); -MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability."); +MODULE_PARM_DESC(relaxed_check, + "Don't do all checks for speedstep capability."); #endif -MODULE_AUTHOR ("Dominik Brodowski "); -MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Dominik Brodowski "); +MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h index b11bcc608cac..2b6c04e5a304 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h @@ -12,17 +12,17 @@ /* processors */ -#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */ -#define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */ -#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ -#define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */ +#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */ +#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */ +#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */ +#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */ /* the following processors are not speedstep-capable and are not auto-detected * in speedstep_detect_processor(). However, their speed can be detected using - * the speedstep_get_processor_frequency() call. */ -#define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */ -#define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */ -#define SPEEDSTEP_PROCESSOR_PCORE 0xFFFFFF05 /* Core */ + * the speedstep_get_frequency() call. */ +#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */ +#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */ +#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */ /* speedstep states -- only two of them */ @@ -34,7 +34,7 @@ extern unsigned int speedstep_detect_processor (void); /* detect the current speed (in khz) of the processor */ -extern unsigned int speedstep_get_processor_frequency(unsigned int processor); +extern unsigned int speedstep_get_frequency(unsigned int processor); /* detect the low and high speeds of the processor. The callback diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index 8a85c93bd62a..befea088e4f5 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -19,8 +19,8 @@ #include #include #include +#include #include -#include #include "speedstep-lib.h" @@ -30,12 +30,12 @@ * If user gives it, these are used. * */ -static int smi_port = 0; -static int smi_cmd = 0; -static unsigned int smi_sig = 0; +static int smi_port; +static int smi_cmd; +static unsigned int smi_sig; /* info about the processor */ -static unsigned int speedstep_processor = 0; +static unsigned int speedstep_processor; /* * There are only two frequency states for each processor. Values @@ -56,12 +56,13 @@ static struct cpufreq_frequency_table speedstep_freqs[] = { * of DMA activity going on? */ #define SMI_TRIES 5 -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "speedstep-smi", msg) /** * speedstep_smi_ownership */ -static int speedstep_smi_ownership (void) +static int speedstep_smi_ownership(void) { u32 command, result, magic, dummy; u32 function = GET_SPEEDSTEP_OWNER; @@ -70,16 +71,18 @@ static int speedstep_smi_ownership (void) command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); magic = virt_to_phys(magic_data); - dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); + dprintk("trying to obtain ownership with command %x at port %x\n", + command, smi_port); __asm__ __volatile__( "push %%ebp\n" "out %%al, (%%dx)\n" "pop %%ebp\n" - : "=D" (result), "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy), - "=S" (dummy) + : "=D" (result), + "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy), + "=S" (dummy) : "a" (command), "b" (function), "c" (0), "d" (smi_port), - "D" (0), "S" (magic) + "D" (0), "S" (magic) : "memory" ); @@ -97,10 +100,10 @@ static int speedstep_smi_ownership (void) * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing * shows that the latter occurs if !(ist_info.event & 0xFFFF). */ -static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) +static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high) { u32 command, result = 0, edi, high_mhz, low_mhz, dummy; - u32 state=0; + u32 state = 0; u32 function = GET_SPEEDSTEP_FREQS; if (!(ist_info.event & 0xFFFF)) { @@ -110,17 +113,25 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); + dprintk("trying to determine frequencies with command %x at port %x\n", + command, smi_port); __asm__ __volatile__( "push %%ebp\n" "out %%al, (%%dx)\n" "pop %%ebp" - : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi), "=S" (dummy) - : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) + : "=a" (result), + "=b" (high_mhz), + "=c" (low_mhz), + "=d" (state), "=D" (edi), "=S" (dummy) + : "a" (command), + "b" (function), + "c" (state), + "d" (smi_port), "S" (0), "D" (0) ); - dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); + dprintk("result %x, low_freq %u, high_freq %u\n", + result, low_mhz, high_mhz); /* abort if results are obviously incorrect... */ if ((high_mhz + low_mhz) < 600) @@ -137,26 +148,30 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) * */ -static int speedstep_get_state (void) +static int speedstep_get_state(void) { - u32 function=GET_SPEEDSTEP_STATE; + u32 function = GET_SPEEDSTEP_STATE; u32 result, state, edi, command, dummy; command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); + dprintk("trying to determine current setting with command %x " + "at port %x\n", command, smi_port); __asm__ __volatile__( "push %%ebp\n" "out %%al, (%%dx)\n" "pop %%ebp\n" - : "=a" (result), "=b" (state), "=D" (edi), "=c" (dummy), "=d" (dummy), "=S" (dummy) - : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0), "D" (0) + : "=a" (result), + "=b" (state), "=D" (edi), + "=c" (dummy), "=d" (dummy), "=S" (dummy) + : "a" (command), "b" (function), "c" (0), + "d" (smi_port), "S" (0), "D" (0) ); dprintk("state is %x, result is %x\n", state, result); - return (state & 1); + return state & 1; } @@ -165,11 +180,11 @@ static int speedstep_get_state (void) * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) * */ -static void speedstep_set_state (unsigned int state) +static void speedstep_set_state(unsigned int state) { unsigned int result = 0, command, new_state, dummy; unsigned long flags; - unsigned int function=SET_SPEEDSTEP_STATE; + unsigned int function = SET_SPEEDSTEP_STATE; unsigned int retry = 0; if (state > 0x1) @@ -180,11 +195,14 @@ static void speedstep_set_state (unsigned int state) command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port); + dprintk("trying to set frequency to state %u " + "with command %x at port %x\n", + state, command, smi_port); do { if (retry) { - dprintk("retry %u, previous result %u, waiting...\n", retry, result); + dprintk("retry %u, previous result %u, waiting...\n", + retry, result); mdelay(retry * 50); } retry++; @@ -192,20 +210,26 @@ static void speedstep_set_state (unsigned int state) "push %%ebp\n" "out %%al, (%%dx)\n" "pop %%ebp" - : "=b" (new_state), "=D" (result), "=c" (dummy), "=a" (dummy), - "=d" (dummy), "=S" (dummy) - : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) + : "=b" (new_state), "=D" (result), + "=c" (dummy), "=a" (dummy), + "=d" (dummy), "=S" (dummy) + : "a" (command), "b" (function), "c" (state), + "d" (smi_port), "S" (0), "D" (0) ); } while ((new_state != state) && (retry <= SMI_TRIES)); /* enable IRQs */ local_irq_restore(flags); - if (new_state == state) { - dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); - } else { - printk(KERN_ERR "cpufreq: change to state %u failed with new_state %u and result %u\n", state, new_state, result); - } + if (new_state == state) + dprintk("change to %u MHz succeeded after %u tries " + "with result %u\n", + (speedstep_freqs[new_state].frequency / 1000), + retry, result); + else + printk(KERN_ERR "cpufreq: change to state %u " + "failed with new_state %u and result %u\n", + state, new_state, result); return; } @@ -219,13 +243,14 @@ static void speedstep_set_state (unsigned int state) * * Sets a new CPUFreq policy/freq. */ -static int speedstep_target (struct cpufreq_policy *policy, +static int speedstep_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { unsigned int newstate = 0; struct cpufreq_freqs freqs; - if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) + if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], + target_freq, relation, &newstate)) return -EINVAL; freqs.old = speedstep_freqs[speedstep_get_state()].frequency; @@ -250,7 +275,7 @@ static int speedstep_target (struct cpufreq_policy *policy, * Limit must be within speedstep_low_freq and speedstep_high_freq, with * at least one border included. */ -static int speedstep_verify (struct cpufreq_policy *policy) +static int speedstep_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); } @@ -259,7 +284,8 @@ static int speedstep_verify (struct cpufreq_policy *policy) static int speedstep_cpu_init(struct cpufreq_policy *policy) { int result; - unsigned int speed,state; + unsigned int speed, state; + unsigned int *low, *high; /* capability check */ if (policy->cpu != 0) @@ -272,19 +298,23 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) } /* detect low and high frequency */ - result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency); + low = &speedstep_freqs[SPEEDSTEP_LOW].frequency; + high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency; + + result = speedstep_smi_get_freqs(low, high); if (result) { - /* fall back to speedstep_lib.c dection mechanism: try both states out */ - dprintk("could not detect low and high frequencies by SMI call.\n"); + /* fall back to speedstep_lib.c dection mechanism: + * try both states out */ + dprintk("could not detect low and high frequencies " + "by SMI call.\n"); result = speedstep_get_freqs(speedstep_processor, - &speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + low, high, NULL, &speedstep_set_state); if (result) { - dprintk("could not detect two different speeds -- aborting.\n"); + dprintk("could not detect two different speeds" + " -- aborting.\n"); return result; } else dprintk("workaround worked.\n"); @@ -295,7 +325,8 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) speed = speedstep_freqs[state].frequency; dprintk("currently at %s speed setting - %i MHz\n", - (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", + (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) + ? "low" : "high", (speed / 1000)); /* cpuinfo and default policy values */ @@ -304,7 +335,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); if (result) - return (result); + return result; cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); @@ -321,7 +352,7 @@ static unsigned int speedstep_get(unsigned int cpu) { if (cpu) return -ENODEV; - return speedstep_get_processor_frequency(speedstep_processor); + return speedstep_get_frequency(speedstep_processor); } @@ -335,7 +366,7 @@ static int speedstep_resume(struct cpufreq_policy *policy) return result; } -static struct freq_attr* speedstep_attr[] = { +static struct freq_attr *speedstep_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -364,21 +395,23 @@ static int __init speedstep_init(void) speedstep_processor = speedstep_detect_processor(); switch (speedstep_processor) { - case SPEEDSTEP_PROCESSOR_PIII_T: - case SPEEDSTEP_PROCESSOR_PIII_C: - case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: + case SPEEDSTEP_CPU_PIII_T: + case SPEEDSTEP_CPU_PIII_C: + case SPEEDSTEP_CPU_PIII_C_EARLY: break; default: speedstep_processor = 0; } if (!speedstep_processor) { - dprintk ("No supported Intel CPU detected.\n"); + dprintk("No supported Intel CPU detected.\n"); return -ENODEV; } - dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", - ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level); + dprintk("signature:0x%.8lx, command:0x%.8lx, " + "event:0x%.8lx, perf_level:0x%.8lx.\n", + ist_info.signature, ist_info.command, + ist_info.event, ist_info.perf_level); /* Error if no IST-SMI BIOS or no PARM sig= 'ISGE' aka 'Intel Speedstep Gate E' */ @@ -416,17 +449,20 @@ static void __exit speedstep_exit(void) cpufreq_unregister_driver(&speedstep_driver); } -module_param(smi_port, int, 0444); -module_param(smi_cmd, int, 0444); -module_param(smi_sig, uint, 0444); +module_param(smi_port, int, 0444); +module_param(smi_cmd, int, 0444); +module_param(smi_sig, uint, 0444); -MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2"); -MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82"); -MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface."); +MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value " + "-- Intel's default setting is 0xb2"); +MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value " + "-- Intel's default setting is 0x82"); +MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the " + "SMI interface."); -MODULE_AUTHOR ("Hiroshi Miura"); -MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Hiroshi Miura"); +MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface."); +MODULE_LICENSE("GPL"); module_init(speedstep_init); module_exit(speedstep_exit); -- cgit v1.2.3 From b9e7638a301b1245d4675087a05fa90fb4fa1845 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 18 Jan 2009 00:32:26 -0500 Subject: [CPUFREQ] checkpatch cleanups for powernow-k7 The asm/timer.h warning can be ignored, it's needed for recalibrate_cpu_khz() Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 237 +++++++++++++++++------------- 1 file changed, 137 insertions(+), 100 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 9cd73d157437..3c28ccd49742 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -6,10 +6,12 @@ * Licensed under the terms of the GNU GPL License version 2. * Based upon datasheets & sample CPUs kindly provided by AMD. * - * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt. - * - We cli/sti on stepping A0 CPUs around the FID/VID transition. - * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect. - * - We disable half multipliers if ACPI is used on A0 stepping CPUs. + * Errata 5: + * CPU may fail to execute a FID/VID change in presence of interrupt. + * - We cli/sti on stepping A0 CPUs around the FID/VID transition. + * Errata 15: + * CPU with half frequency multipliers may hang upon wakeup from disconnect. + * - We disable half multipliers if ACPI is used on A0 stepping CPUs. */ #include @@ -20,11 +22,11 @@ #include #include #include +#include +#include +#include /* Needed for recalibrate_cpu_khz() */ #include -#include -#include -#include #include #ifdef CONFIG_X86_POWERNOW_K7_ACPI @@ -58,9 +60,9 @@ struct pst_s { union powernow_acpi_control_t { struct { unsigned long fid:5, - vid:5, - sgtc:20, - res1:2; + vid:5, + sgtc:20, + res1:2; } bits; unsigned long val; }; @@ -101,7 +103,8 @@ static unsigned int fsb; static unsigned int latency; static char have_a0; -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "powernow-k7", msg) static int check_fsb(unsigned int fsbspeed) { @@ -109,7 +112,7 @@ static int check_fsb(unsigned int fsbspeed) unsigned int f = fsb / 1000; delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; - return (delta < 5); + return delta < 5; } static int check_powernow(void) @@ -117,24 +120,26 @@ static int check_powernow(void) struct cpuinfo_x86 *c = &cpu_data(0); unsigned int maxei, eax, ebx, ecx, edx; - if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { + if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) { #ifdef MODULE - printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n"); + printk(KERN_INFO PFX "This module only works with " + "AMD K7 CPUs\n"); #endif return 0; } /* Get maximum capabilities */ - maxei = cpuid_eax (0x80000000); + maxei = cpuid_eax(0x80000000); if (maxei < 0x80000007) { /* Any powernow info ? */ #ifdef MODULE - printk (KERN_INFO PFX "No powernow capabilities detected\n"); + printk(KERN_INFO PFX "No powernow capabilities detected\n"); #endif return 0; } if ((c->x86_model == 6) && (c->x86_mask == 0)) { - printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n"); + printk(KERN_INFO PFX "K7 660[A0] core detected, " + "enabling errata workarounds\n"); have_a0 = 1; } @@ -144,37 +149,42 @@ static int check_powernow(void) if (!(edx & (1 << 1 | 1 << 2))) return 0; - printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); + printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); if (edx & 1 << 1) { - printk ("frequency"); - can_scale_bus=1; + printk("frequency"); + can_scale_bus = 1; } if ((edx & (1 << 1 | 1 << 2)) == 0x6) - printk (" and "); + printk(" and "); if (edx & 1 << 2) { - printk ("voltage"); - can_scale_vid=1; + printk("voltage"); + can_scale_vid = 1; } - printk (".\n"); + printk(".\n"); return 1; } +static void invalidate_entry(unsigned int entry) +{ + powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; +} -static int get_ranges (unsigned char *pst) +static int get_ranges(unsigned char *pst) { unsigned int j; unsigned int speed; u8 fid, vid; - powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); + powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * + (number_scales + 1)), GFP_KERNEL); if (!powernow_table) return -ENOMEM; - for (j=0 ; j < number_scales; j++) { + for (j = 0 ; j < number_scales; j++) { fid = *pst++; powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; @@ -182,10 +192,10 @@ static int get_ranges (unsigned char *pst) speed = powernow_table[j].frequency; - if ((fid_codes[fid] % 10)==5) { + if ((fid_codes[fid] % 10) == 5) { #ifdef CONFIG_X86_POWERNOW_K7_ACPI if (have_a0 == 1) - powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID; + invalidate_entry(j); #endif } @@ -197,7 +207,7 @@ static int get_ranges (unsigned char *pst) vid = *pst++; powernow_table[j].index |= (vid << 8); /* upper 8 bits */ - dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " + dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, fid_codes[fid] % 10, speed/1000, vid, mobile_vid_table[vid]/1000, @@ -214,13 +224,13 @@ static void change_FID(int fid) { union msr_fidvidctl fidvidctl; - rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); if (fidvidctl.bits.FID != fid) { fidvidctl.bits.SGTC = latency; fidvidctl.bits.FID = fid; fidvidctl.bits.VIDC = 0; fidvidctl.bits.FIDC = 1; - wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); } } @@ -229,18 +239,18 @@ static void change_VID(int vid) { union msr_fidvidctl fidvidctl; - rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); if (fidvidctl.bits.VID != vid) { fidvidctl.bits.SGTC = latency; fidvidctl.bits.VID = vid; fidvidctl.bits.FIDC = 0; fidvidctl.bits.VIDC = 1; - wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); } } -static void change_speed (unsigned int index) +static void change_speed(unsigned int index) { u8 fid, vid; struct cpufreq_freqs freqs; @@ -257,7 +267,7 @@ static void change_speed (unsigned int index) freqs.cpu = 0; - rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); + rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); cfid = fidvidstatus.bits.CFID; freqs.old = fsb * fid_codes[cfid] / 10; @@ -321,12 +331,14 @@ static int powernow_acpi_init(void) goto err1; } - if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { + if (acpi_processor_perf->control_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE) { retval = -ENODEV; goto err2; } - if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { + if (acpi_processor_perf->status_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE) { retval = -ENODEV; goto err2; } @@ -338,7 +350,8 @@ static int powernow_acpi_init(void) goto err2; } - powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); + powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * + (number_scales + 1)), GFP_KERNEL); if (!powernow_table) { retval = -ENOMEM; goto err2; @@ -352,7 +365,7 @@ static int powernow_acpi_init(void) unsigned int speed, speed_mhz; pc.val = (unsigned long) state->control; - dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", + dprintk("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", i, (u32) state->core_frequency, (u32) state->power, @@ -381,12 +394,12 @@ static int powernow_acpi_init(void) if (speed % 1000 > 0) speed_mhz++; - if ((fid_codes[fid] % 10)==5) { + if ((fid_codes[fid] % 10) == 5) { if (have_a0 == 1) - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + invalidate_entry(i); } - dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " + dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, fid_codes[fid] % 10, speed_mhz, vid, mobile_vid_table[vid]/1000, @@ -422,7 +435,8 @@ err1: err05: kfree(acpi_processor_perf); err0: - printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); + printk(KERN_WARNING PFX "ACPI perflib can not be used on " + "this platform\n"); acpi_processor_perf = NULL; return retval; } @@ -435,7 +449,14 @@ static int powernow_acpi_init(void) } #endif -static int powernow_decode_bios (int maxfid, int startvid) +static void print_pst_entry(struct pst_s *pst, unsigned int j) +{ + dprintk("PST:%d (@%p)\n", j, pst); + dprintk(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", + pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); +} + +static int powernow_decode_bios(int maxfid, int startvid) { struct psb_s *psb; struct pst_s *pst; @@ -446,61 +467,67 @@ static int powernow_decode_bios (int maxfid, int startvid) etuple = cpuid_eax(0x80000001); - for (i=0xC0000; i < 0xffff0 ; i+=16) { + for (i = 0xC0000; i < 0xffff0 ; i += 16) { p = phys_to_virt(i); - if (memcmp(p, "AMDK7PNOW!", 10) == 0){ - dprintk ("Found PSB header at %p\n", p); + if (memcmp(p, "AMDK7PNOW!", 10) == 0) { + dprintk("Found PSB header at %p\n", p); psb = (struct psb_s *) p; - dprintk ("Table version: 0x%x\n", psb->tableversion); + dprintk("Table version: 0x%x\n", psb->tableversion); if (psb->tableversion != 0x12) { - printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n"); + printk(KERN_INFO PFX "Sorry, only v1.2 tables" + " supported right now\n"); return -ENODEV; } - dprintk ("Flags: 0x%x\n", psb->flags); - if ((psb->flags & 1)==0) { - dprintk ("Mobile voltage regulator\n"); - } else { - dprintk ("Desktop voltage regulator\n"); - } + dprintk("Flags: 0x%x\n", psb->flags); + if ((psb->flags & 1) == 0) + dprintk("Mobile voltage regulator\n"); + else + dprintk("Desktop voltage regulator\n"); latency = psb->settlingtime; if (latency < 100) { - printk(KERN_INFO PFX "BIOS set settling time to %d microseconds. " - "Should be at least 100. Correcting.\n", latency); + printk(KERN_INFO PFX "BIOS set settling time " + "to %d microseconds. " + "Should be at least 100. " + "Correcting.\n", latency); latency = 100; } - dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime); - dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst); + dprintk("Settling Time: %d microseconds.\n", + psb->settlingtime); + dprintk("Has %d PST tables. (Only dumping ones " + "relevant to this CPU).\n", + psb->numpst); - p += sizeof (struct psb_s); + p += sizeof(struct psb_s); pst = (struct pst_s *) p; - for (j=0; jnumpst; j++) { + for (j = 0; j < psb->numpst; j++) { pst = (struct pst_s *) p; number_scales = pst->numpstates; - if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) && - (maxfid==pst->maxfid) && (startvid==pst->startvid)) - { - dprintk ("PST:%d (@%p)\n", j, pst); - dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", - pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); - - ret = get_ranges ((char *) pst + sizeof (struct pst_s)); + if ((etuple == pst->cpuid) && + check_fsb(pst->fsbspeed) && + (maxfid == pst->maxfid) && + (startvid == pst->startvid)) { + print_pst_entry(pst, j); + p = (char *)pst + sizeof(struct pst_s); + ret = get_ranges(p); return ret; } else { unsigned int k; - p = (char *) pst + sizeof (struct pst_s); - for (k=0; kident); - printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); - printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n"); + printk(KERN_WARNING PFX + "%s laptop with broken PST tables in BIOS detected.\n", + d->ident); + printk(KERN_WARNING PFX + "You need to downgrade to 3A21 (09/09/2002), or try a newer " + "BIOS than 3A71 (01/20/2003)\n"); + printk(KERN_WARNING PFX + "cpufreq scaling has been disabled as a result of this.\n"); return 0; } @@ -598,7 +631,7 @@ static struct dmi_system_id __initdata powernow_dmi_table[] = { { } }; -static int __init powernow_cpu_init (struct cpufreq_policy *policy) +static int __init powernow_cpu_init(struct cpufreq_policy *policy) { union msr_fidvidstatus fidvidstatus; int result; @@ -606,7 +639,7 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) if (policy->cpu != 0) return -ENODEV; - rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); + rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); recalibrate_cpu_khz(); @@ -618,19 +651,21 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) dprintk("FSB: %3dMHz\n", fsb/1000); if (dmi_check_system(powernow_dmi_table) || acpi_force) { - printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); + printk(KERN_INFO PFX "PSB/PST known to be broken. " + "Trying ACPI instead\n"); result = powernow_acpi_init(); } else { - result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID); + result = powernow_decode_bios(fidvidstatus.bits.MFID, + fidvidstatus.bits.SVID); if (result) { - printk (KERN_INFO PFX "Trying ACPI perflib\n"); + printk(KERN_INFO PFX "Trying ACPI perflib\n"); maximum_speed = 0; minimum_speed = -1; latency = 0; result = powernow_acpi_init(); if (result) { - printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); - printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n"); + printk(KERN_INFO PFX + "ACPI and legacy methods failed\n"); } } else { /* SGTC use the bus clock as timer */ @@ -642,10 +677,11 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) if (result) return result; - printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", + printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", minimum_speed/1000, maximum_speed/1000); - policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency); + policy->cpuinfo.transition_latency = + cpufreq_scale(2000000UL, fsb, latency); policy->cur = powernow_get(0); @@ -654,7 +690,8 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) return cpufreq_frequency_table_cpuinfo(policy, powernow_table); } -static int powernow_cpu_exit (struct cpufreq_policy *policy) { +static int powernow_cpu_exit(struct cpufreq_policy *policy) +{ cpufreq_frequency_table_put_attr(policy->cpu); #ifdef CONFIG_X86_POWERNOW_K7_ACPI @@ -669,7 +706,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) { return 0; } -static struct freq_attr* powernow_table_attr[] = { +static struct freq_attr *powernow_table_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -685,15 +722,15 @@ static struct cpufreq_driver powernow_driver = { .attr = powernow_table_attr, }; -static int __init powernow_init (void) +static int __init powernow_init(void) { - if (check_powernow()==0) + if (check_powernow() == 0) return -ENODEV; return cpufreq_register_driver(&powernow_driver); } -static void __exit powernow_exit (void) +static void __exit powernow_exit(void) { cpufreq_unregister_driver(&powernow_driver); } @@ -701,9 +738,9 @@ static void __exit powernow_exit (void) module_param(acpi_force, int, 0444); MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); -MODULE_AUTHOR ("Dave Jones "); -MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Dave Jones "); +MODULE_DESCRIPTION("Powernow driver for AMD K7 processors."); +MODULE_LICENSE("GPL"); late_initcall(powernow_init); module_exit(powernow_exit); -- cgit v1.2.3 From 0e64a0c982c06a6b8f5e2a7f29eb108fdf257b2f Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 4 Feb 2009 14:37:50 -0500 Subject: [CPUFREQ] checkpatch cleanups for powernow-k8 This driver has so many long function names, and deep nested if's The remaining warnings will need some code restructuring to clean up. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 351 +++++++++++++++++++----------- 1 file changed, 226 insertions(+), 125 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 6428aa17b40e..4dd7e3bdee23 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -33,10 +33,10 @@ #include #include #include /* for current / set_cpus_allowed() */ +#include +#include #include -#include -#include #ifdef CONFIG_X86_POWERNOW_K8_ACPI #include @@ -71,7 +71,8 @@ static u32 find_khz_freq_from_fid(u32 fid) return 1000 * find_freq_from_fid(fid); } -static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate) +static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, + u32 pstate) { return data[pstate].frequency; } @@ -186,7 +187,9 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) return 1; } - lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; + lo = fid; + lo |= (data->currvid << MSR_C_LO_VID_SHIFT); + lo |= MSR_C_LO_INIT_FID_VID; dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", fid, lo, data->plllock * PLL_LOCK_CONVERSION); @@ -194,7 +197,9 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) do { wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); if (i++ > 100) { - printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n"); + printk(KERN_ERR PFX + "Hardware error - pending bit very stuck - " + "no further pstate changes possible\n"); return 1; } } while (query_current_values_with_pending_wait(data)); @@ -202,14 +207,16 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) count_off_irt(data); if (savevid != data->currvid) { - printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n", - savevid, data->currvid); + printk(KERN_ERR PFX + "vid change on fid trans, old 0x%x, new 0x%x\n", + savevid, data->currvid); return 1; } if (fid != data->currfid) { - printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid, - data->currfid); + printk(KERN_ERR PFX + "fid trans failed, fid 0x%x, curr 0x%x\n", fid, + data->currfid); return 1; } @@ -228,7 +235,9 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) return 1; } - lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; + lo = data->currfid; + lo |= (vid << MSR_C_LO_VID_SHIFT); + lo |= MSR_C_LO_INIT_FID_VID; dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", vid, lo, STOP_GRANT_5NS); @@ -236,20 +245,24 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) do { wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); if (i++ > 100) { - printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + printk(KERN_ERR PFX "internal error - pending bit " + "very stuck - no further pstate " + "changes possible\n"); return 1; } } while (query_current_values_with_pending_wait(data)); if (savefid != data->currfid) { - printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n", + printk(KERN_ERR PFX "fid changed on vid trans, old " + "0x%x new 0x%x\n", savefid, data->currfid); return 1; } if (vid != data->currvid) { - printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid, - data->currvid); + printk(KERN_ERR PFX "vid trans failed, vid 0x%x, " + "curr 0x%x\n", + vid, data->currvid); return 1; } @@ -261,7 +274,8 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) * Decreasing vid codes represent increasing voltages: * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. */ -static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step) +static int decrease_vid_code_by_step(struct powernow_k8_data *data, + u32 reqvid, u32 step) { if ((data->currvid - reqvid) > step) reqvid = data->currvid - step; @@ -283,7 +297,8 @@ static int transition_pstate(struct powernow_k8_data *data, u32 pstate) } /* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ -static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) +static int transition_fid_vid(struct powernow_k8_data *data, + u32 reqfid, u32 reqvid) { if (core_voltage_pre_transition(data, reqvid)) return 1; @@ -298,7 +313,8 @@ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 req return 1; if ((reqfid != data->currfid) || (reqvid != data->currvid)) { - printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n", + printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, " + "curr 0x%x 0x%x\n", smp_processor_id(), reqfid, reqvid, data->currfid, data->currvid); return 1; @@ -311,13 +327,15 @@ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 req } /* Phase 1 - core voltage transition ... setup voltage */ -static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid) +static int core_voltage_pre_transition(struct powernow_k8_data *data, + u32 reqvid) { u32 rvosteps = data->rvo; u32 savefid = data->currfid; u32 maxvid, lo; - dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", + dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " + "reqvid 0x%x, rvo 0x%x\n", smp_processor_id(), data->currfid, data->currvid, reqvid, data->rvo); @@ -340,7 +358,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid } else { dprintk("ph1: changing vid for rvo, req 0x%x\n", data->currvid - 1); - if (decrease_vid_code_by_step(data, data->currvid - 1, 1)) + if (decrease_vid_code_by_step(data, data->currvid-1, 1)) return 1; rvosteps--; } @@ -350,7 +368,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid return 1; if (savefid != data->currfid) { - printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid); + printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", + data->currfid); return 1; } @@ -363,20 +382,24 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid /* Phase 2 - core frequency transition */ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) { - u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid; + u32 vcoreqfid, vcocurrfid, vcofiddiff; + u32 fid_interval, savevid = data->currvid; - if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { - printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n", - reqfid, data->currfid); + if ((reqfid < HI_FID_TABLE_BOTTOM) && + (data->currfid < HI_FID_TABLE_BOTTOM)) { + printk(KERN_ERR PFX "ph2: illegal lo-lo transition " + "0x%x 0x%x\n", reqfid, data->currfid); return 1; } if (data->currfid == reqfid) { - printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid); + printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", + data->currfid); return 0; } - dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n", + dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, " + "reqfid 0x%x\n", smp_processor_id(), data->currfid, data->currvid, reqfid); @@ -390,14 +413,14 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) if (reqfid > data->currfid) { if (data->currfid > LO_FID_TABLE_TOP) { - if (write_new_fid(data, data->currfid + fid_interval)) { + if (write_new_fid(data, + data->currfid + fid_interval)) return 1; - } } else { if (write_new_fid - (data, 2 + convert_fid_to_vco_fid(data->currfid))) { + (data, + 2 + convert_fid_to_vco_fid(data->currfid))) return 1; - } } } else { if (write_new_fid(data, data->currfid - fid_interval)) @@ -417,7 +440,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) if (data->currfid != reqfid) { printk(KERN_ERR PFX - "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n", + "ph2: mismatch, failed fid transition, " + "curr 0x%x, req 0x%x\n", data->currfid, reqfid); return 1; } @@ -435,7 +459,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) } /* Phase 3 - core voltage transition flow ... jump to the final vid. */ -static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid) +static int core_voltage_post_transition(struct powernow_k8_data *data, + u32 reqvid) { u32 savefid = data->currfid; u32 savereqvid = reqvid; @@ -457,7 +482,8 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi if (data->currvid != reqvid) { printk(KERN_ERR PFX - "ph3: failed vid transition\n, req 0x%x, curr 0x%x", + "ph3: failed vid transition\n, " + "req 0x%x, curr 0x%x", reqvid, data->currvid); return 1; } @@ -508,7 +534,8 @@ static int check_supported_cpu(unsigned int cpu) if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { - printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); + printk(KERN_INFO PFX + "Processor cpuid %x not supported\n", eax); goto out; } @@ -520,8 +547,10 @@ static int check_supported_cpu(unsigned int cpu) } cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); - if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { - printk(KERN_INFO PFX "Power state transitions not supported\n"); + if ((edx & P_STATE_TRANSITION_CAPABLE) + != P_STATE_TRANSITION_CAPABLE) { + printk(KERN_INFO PFX + "Power state transitions not supported\n"); goto out; } } else { /* must be a HW Pstate capable processor */ @@ -539,7 +568,8 @@ out: return rc; } -static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) +static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, + u8 maxvid) { unsigned int j; u8 lastfid = 0xff; @@ -550,12 +580,14 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 j, pst[j].vid); return -EINVAL; } - if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */ + if (pst[j].vid < data->rvo) { + /* vid + rvo >= 0 */ printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate" " %d\n", j); return -ENODEV; } - if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */ + if (pst[j].vid < maxvid + data->rvo) { + /* vid + rvo >= maxvid */ printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate" " %d\n", j); return -ENODEV; @@ -579,23 +611,31 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 return -EINVAL; } if (lastfid > LO_FID_TABLE_TOP) - printk(KERN_INFO FW_BUG PFX "first fid not from lo freq table\n"); + printk(KERN_INFO FW_BUG PFX + "first fid not from lo freq table\n"); return 0; } +static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry) +{ + data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; +} + static void print_basics(struct powernow_k8_data *data) { int j; for (j = 0; j < data->numps; j++) { - if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { + if (data->powernow_table[j].frequency != + CPUFREQ_ENTRY_INVALID) { if (cpu_family == CPU_HW_PSTATE) { - printk(KERN_INFO PFX " %d : pstate %d (%d MHz)\n", - j, + printk(KERN_INFO PFX + " %d : pstate %d (%d MHz)\n", j, data->powernow_table[j].index, data->powernow_table[j].frequency/1000); } else { - printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", + printk(KERN_INFO PFX + " %d : fid 0x%x (%d MHz), vid 0x%x\n", j, data->powernow_table[j].index & 0xff, data->powernow_table[j].frequency/1000, @@ -604,20 +644,25 @@ static void print_basics(struct powernow_k8_data *data) } } if (data->batps) - printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps); + printk(KERN_INFO PFX "Only %d pstates on battery\n", + data->batps); } -static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) +static int fill_powernow_table(struct powernow_k8_data *data, + struct pst_s *pst, u8 maxvid) { struct cpufreq_frequency_table *powernow_table; unsigned int j; - if (data->batps) { /* use ACPI support to get full speed on mains power */ - printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps); + if (data->batps) { + /* use ACPI support to get full speed on mains power */ + printk(KERN_WARNING PFX + "Only %d pstates usable (use ACPI driver for full " + "range\n", data->batps); data->numps = data->batps; } - for ( j=1; jnumps; j++ ) { + for (j = 1; j < data->numps; j++) { if (pst[j-1].fid >= pst[j].fid) { printk(KERN_ERR PFX "PST out of sequence\n"); return -EINVAL; @@ -640,9 +685,11 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, } for (j = 0; j < data->numps; j++) { + int freq; powernow_table[j].index = pst[j].fid; /* lower 8 bits */ powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ - powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid); + freq = find_khz_freq_from_fid(pst[j].fid); + powernow_table[j].frequency = freq; } powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; powernow_table[data->numps].index = 0; @@ -658,7 +705,8 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, print_basics(data); for (j = 0; j < data->numps; j++) - if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid)) + if ((pst[j].fid == data->currfid) && + (pst[j].vid == data->currvid)) return 0; dprintk("currfid/vid do not match PST, ignoring\n"); @@ -698,7 +746,8 @@ static int find_psb_table(struct powernow_k8_data *data) } data->vstable = psb->vstable; - dprintk("voltage stabilization time: %d(*20us)\n", data->vstable); + dprintk("voltage stabilization time: %d(*20us)\n", + data->vstable); dprintk("flags2: 0x%x\n", psb->flags2); data->rvo = psb->flags2 & 3; @@ -713,11 +762,12 @@ static int find_psb_table(struct powernow_k8_data *data) dprintk("numpst: 0x%x\n", psb->num_tables); cpst = psb->num_tables; - if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){ + if ((psb->cpuid == 0x00000fc0) || + (psb->cpuid == 0x00000fe0)) { thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) { + if ((thiscpuid == 0x00000fc0) || + (thiscpuid == 0x00000fe0)) cpst = 1; - } } if (cpst != 1) { printk(KERN_ERR FW_BUG PFX "numpst must be 1\n"); @@ -732,7 +782,8 @@ static int find_psb_table(struct powernow_k8_data *data) data->numps = psb->numps; dprintk("numpstates: 0x%x\n", data->numps); - return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid); + return fill_powernow_table(data, + (struct pst_s *)(psb+1), maxvid); } /* * If you see this message, complain to BIOS manufacturer. If @@ -750,23 +801,27 @@ static int find_psb_table(struct powernow_k8_data *data) } #ifdef CONFIG_X86_POWERNOW_K8_ACPI -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) +static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, + unsigned int index) { + acpi_integer control; + if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) return; - data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; - data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; - data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; - data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; - data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); - data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; -} + control = data->acpi_data.states[index].control; data->irt = (control + >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >> + RVO_SHIFT) & RVO_MASK; data->exttype = (control + >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; + data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 + << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable = + (control >> VST_SHIFT) & VST_MASK; } static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { struct cpufreq_frequency_table *powernow_table; int ret_val = -ENODEV; + acpi_integer space_id; if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { dprintk("register performance failed: bad ACPI data\n"); @@ -779,11 +834,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) goto err_out; } - if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || - (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { + space_id = data->acpi_data.control_register.space_id; + if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || + (space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { dprintk("Invalid control/status registers (%x - %x)\n", data->acpi_data.control_register.space_id, - data->acpi_data.status_register.space_id); + space_id); goto err_out; } @@ -802,7 +858,8 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) if (ret_val) goto err_out_mem; - powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; + powernow_table[data->acpi_data.state_count].frequency = + CPUFREQ_TABLE_END; powernow_table[data->acpi_data.state_count].index = 0; data->powernow_table = powernow_table; @@ -830,13 +887,15 @@ err_out_mem: err_out: acpi_processor_unregister_performance(&data->acpi_data, data->cpu); - /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ + /* data->acpi_data.state_count informs us at ->exit() + * whether ACPI was used */ data->acpi_data.state_count = 0; return ret_val; } -static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) +static int fill_powernow_table_pstate(struct powernow_k8_data *data, + struct cpufreq_frequency_table *powernow_table) { int i; u32 hi = 0, lo = 0; @@ -848,84 +907,101 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf index = data->acpi_data.states[i].control & HW_PSTATE_MASK; if (index > data->max_hw_pstate) { - printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); - printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + printk(KERN_ERR PFX "invalid pstate %d - " + "bad value %d.\n", i, index); + printk(KERN_ERR PFX "Please report to BIOS " + "manufacturer\n"); + invalidate_entry(data, i); continue; } rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); if (!(hi & HW_PSTATE_VALID_MASK)) { dprintk("invalid pstate %d, ignoring\n", index); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + invalidate_entry(data, i); continue; } powernow_table[i].index = index; - powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; + powernow_table[i].frequency = + data->acpi_data.states[i].core_frequency * 1000; } return 0; } -static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) +static int fill_powernow_table_fidvid(struct powernow_k8_data *data, + struct cpufreq_frequency_table *powernow_table) { int i; int cntlofreq = 0; + for (i = 0; i < data->acpi_data.state_count; i++) { u32 fid; u32 vid; + u32 freq, index; + acpi_integer status, control; if (data->exttype) { - fid = data->acpi_data.states[i].status & EXT_FID_MASK; - vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; + status = data->acpi_data.states[i].status; + fid = status & EXT_FID_MASK; + vid = (status >> VID_SHIFT) & EXT_VID_MASK; } else { - fid = data->acpi_data.states[i].control & FID_MASK; - vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; + control = data->acpi_data.states[i].control; + fid = control & FID_MASK; + vid = (control >> VID_SHIFT) & VID_MASK; } dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); - powernow_table[i].index = fid; /* lower 8 bits */ - powernow_table[i].index |= (vid << 8); /* upper 8 bits */ - powernow_table[i].frequency = find_khz_freq_from_fid(fid); + index = fid | (vid<<8); + powernow_table[i].index = index; + + freq = find_khz_freq_from_fid(fid); + powernow_table[i].frequency = freq; /* verify frequency is OK */ - if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) || - (powernow_table[i].frequency < (MIN_FREQ * 1000))) { - dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { + dprintk("invalid freq %u kHz, ignoring\n", freq); + invalidate_entry(data, i); continue; } - /* verify voltage is OK - BIOSs are using "off" to indicate invalid */ + /* verify voltage is OK - + * BIOSs are using "off" to indicate invalid */ if (vid == VID_OFF) { dprintk("invalid vid %u, ignoring\n", vid); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + invalidate_entry(data, i); continue; } /* verify only 1 entry from the lo frequency table */ if (fid < HI_FID_TABLE_BOTTOM) { if (cntlofreq) { - /* if both entries are the same, ignore this one ... */ - if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || - (powernow_table[i].index != powernow_table[cntlofreq].index)) { - printk(KERN_ERR PFX "Too many lo freq table entries\n"); + /* if both entries are the same, + * ignore this one ... */ + if ((freq != powernow_table[cntlofreq].frequency) || + (index != powernow_table[cntlofreq].index)) { + printk(KERN_ERR PFX + "Too many lo freq table " + "entries\n"); return 1; } - dprintk("double low frequency table entry, ignoring it.\n"); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + dprintk("double low frequency table entry, " + "ignoring it.\n"); + invalidate_entry(data, i); continue; } else cntlofreq = i; } - if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { - printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", - powernow_table[i].frequency, - (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { + printk(KERN_INFO PFX "invalid freq entries " + "%u kHz vs. %u kHz\n", freq, + (unsigned int) + (data->acpi_data.states[i].core_frequency + * 1000)); + invalidate_entry(data, i); continue; } } @@ -935,7 +1011,8 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { if (data->acpi_data.state_count) - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + acpi_processor_unregister_performance(&data->acpi_data, + data->cpu); free_cpumask_var(data->acpi_data.shared_cpu_map); } @@ -954,14 +1031,25 @@ static int get_transition_latency(struct powernow_k8_data *data) } #else -static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } -static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } +static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) +{ + return -ENODEV; +} +static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) +{ + return; +} +static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, + unsigned int index) +{ + return; +} static int get_transition_latency(struct powernow_k8_data *data) { return 0; } #endif /* CONFIG_X86_POWERNOW_K8_ACPI */ /* Take a frequency, and issue the fid/vid transition command */ -static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index) +static int transition_frequency_fidvid(struct powernow_k8_data *data, + unsigned int index) { u32 fid = 0; u32 vid = 0; @@ -989,7 +1077,8 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i return 0; } - if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { + if ((fid < HI_FID_TABLE_BOTTOM) && + (data->currfid < HI_FID_TABLE_BOTTOM)) { printk(KERN_ERR PFX "ignoring illegal change in lo freq table-%x to 0x%x\n", data->currfid, fid); @@ -1017,7 +1106,8 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i } /* Take a frequency, and issue the hardware pstate transition command */ -static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) +static int transition_frequency_pstate(struct powernow_k8_data *data, + unsigned int index) { u32 pstate = 0; int res, i; @@ -1029,7 +1119,8 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i pstate = index & HW_PSTATE_MASK; if (pstate > data->max_hw_pstate) return 0; - freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); + freqs.old = find_khz_freq_from_pstate(data->powernow_table, + data->currpstate); freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); for_each_cpu_mask_nr(i, *(data->available_cores)) { @@ -1048,7 +1139,8 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i } /* Driver entry point to switch to the target frequency */ -static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) +static int powernowk8_target(struct cpufreq_policy *pol, + unsigned targfreq, unsigned relation) { cpumask_t oldmask; struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); @@ -1087,14 +1179,18 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi dprintk("targ: curr fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); - if ((checkvid != data->currvid) || (checkfid != data->currfid)) { + if ((checkvid != data->currvid) || + (checkfid != data->currfid)) { printk(KERN_INFO PFX - "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", - checkfid, data->currfid, checkvid, data->currvid); + "error - out of sync, fix 0x%x 0x%x, " + "vid 0x%x 0x%x\n", + checkfid, data->currfid, + checkvid, data->currvid); } } - if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) + if (cpufreq_frequency_table_target(pol, data->powernow_table, + targfreq, relation, &newstate)) goto err_out; mutex_lock(&fidvid_mutex); @@ -1114,7 +1210,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi mutex_unlock(&fidvid_mutex); if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate); + pol->cur = find_khz_freq_from_pstate(data->powernow_table, + newstate); else pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; @@ -1164,10 +1261,11 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) */ if (num_online_cpus() != 1) { #ifndef CONFIG_ACPI_PROCESSOR - printk(KERN_ERR PFX "ACPI Processor support is required " - "for SMP systems but is absent. Please load the " - "ACPI Processor module before starting this " - "driver.\n"); + printk(KERN_ERR PFX + "ACPI Processor support is required for " + "SMP systems but is absent. Please load the " + "ACPI Processor module before starting this " + "driver.\n"); #else printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide" " ACPI _PSS objects in a way that Linux " @@ -1228,7 +1326,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) data->available_cores = pol->cpus; if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); + pol->cur = find_khz_freq_from_pstate(data->powernow_table, + data->currpstate); else pol->cur = find_khz_freq_from_fid(data->currfid); dprintk("policy current frequency %d kHz\n", pol->cur); @@ -1245,7 +1344,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); if (cpu_family == CPU_HW_PSTATE) - dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate); + dprintk("cpu_init done, current pstate 0x%x\n", + data->currpstate); else dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); @@ -1262,7 +1362,7 @@ err_out: return -ENODEV; } -static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) +static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) { struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); @@ -1279,7 +1379,7 @@ static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) return 0; } -static unsigned int powernowk8_get (unsigned int cpu) +static unsigned int powernowk8_get(unsigned int cpu) { struct powernow_k8_data *data; cpumask_t oldmask = current->cpus_allowed; @@ -1315,7 +1415,7 @@ out: return khz; } -static struct freq_attr* powernow_k8_attr[] = { +static struct freq_attr *powernow_k8_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -1360,7 +1460,8 @@ static void __exit powernowk8_exit(void) cpufreq_unregister_driver(&cpufreq_amd64_driver); } -MODULE_AUTHOR("Paul Devriendt and Mark Langsdorf "); +MODULE_AUTHOR("Paul Devriendt and " + "Mark Langsdorf "); MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 57f4fa699195b761cbea90db5e38b4bc15610c7c Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 4 Feb 2009 01:17:45 +0100 Subject: [CPUFREQ] powernow-k8: Always compile powernow-k8 driver with ACPI support powernow-k8 driver should always try to get cpufreq info from ACPI. Otherwise it will not be able to detect the transition latency correctly which results in ondemand governor taking a wrong sampling rate which will then result in sever performance loss. Let the user not shoot himself in the foot and always compile in ACPI support for powernow-k8. This also fixes a wrong message if ACPI_PROCESSOR is compiled as a module and #ifndef CONFIG_ACPI_PROCESSOR path is chosen. Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/Kconfig | 19 ++----------------- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 28 ---------------------------- arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 5 +---- 3 files changed, 3 insertions(+), 49 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index 65792c2cc462..52c839875478 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -87,30 +87,15 @@ config X86_POWERNOW_K7_ACPI config X86_POWERNOW_K8 tristate "AMD Opteron/Athlon64 PowerNow!" select CPU_FREQ_TABLE + depends on ACPI && ACPI_PROCESSOR help - This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. + This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors. To compile this driver as a module, choose M here: the module will be called powernow-k8. For details, take a look at . - If in doubt, say N. - -config X86_POWERNOW_K8_ACPI - bool - prompt "ACPI Support" if X86_32 - depends on ACPI && X86_POWERNOW_K8 && ACPI_PROCESSOR - depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m) - default y - help - This provides access to the K8s Processor Performance States via ACPI. - This driver is probably required for CPUFreq to work with multi-socket and - SMP systems. It is not required on at least some single-socket yet - multi-core systems, even if SMP is enabled. - - It is safe to say Y here. - config X86_GX_SUSPMOD tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" depends on X86_32 && PCI diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 4dd7e3bdee23..acc06b03194e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -38,11 +38,9 @@ #include -#ifdef CONFIG_X86_POWERNOW_K8_ACPI #include #include #include -#endif #define PFX "powernow-k8: " #define VERSION "version 2.20.00" @@ -800,7 +798,6 @@ static int find_psb_table(struct powernow_k8_data *data) return -ENODEV; } -#ifdef CONFIG_X86_POWERNOW_K8_ACPI static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { @@ -1030,23 +1027,6 @@ static int get_transition_latency(struct powernow_k8_data *data) return 1000 * max_latency; } -#else -static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) -{ - return -ENODEV; -} -static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) -{ - return; -} -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, - unsigned int index) -{ - return; -} -static int get_transition_latency(struct powernow_k8_data *data) { return 0; } -#endif /* CONFIG_X86_POWERNOW_K8_ACPI */ - /* Take a frequency, and issue the fid/vid transition command */ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index) @@ -1260,19 +1240,11 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) * an UP version, and is deprecated by AMD. */ if (num_online_cpus() != 1) { -#ifndef CONFIG_ACPI_PROCESSOR - printk(KERN_ERR PFX - "ACPI Processor support is required for " - "SMP systems but is absent. Please load the " - "ACPI Processor module before starting this " - "driver.\n"); -#else printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide" " ACPI _PSS objects in a way that Linux " "understands. Please report this to the Linux " "ACPI maintainers and complain to your BIOS " "vendor.\n"); -#endif kfree(data); return -ENODEV; } diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 8ecc75b6c7c3..6c6698feade1 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -45,11 +45,10 @@ struct powernow_k8_data { * frequency is in kHz */ struct cpufreq_frequency_table *powernow_table; -#ifdef CONFIG_X86_POWERNOW_K8_ACPI /* the acpi table needs to be kept. it's only available if ACPI was * used to determine valid frequency/vid/fid states */ struct acpi_processor_performance acpi_data; -#endif + /* we need to keep track of associated cores, but let cpufreq * handle hotplug events - so just point at cpufreq pol->cpus * structure */ @@ -222,10 +221,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); -#ifdef CONFIG_X86_POWERNOW_K8_ACPI static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); -#endif #ifdef CONFIG_SMP static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) -- cgit v1.2.3 From 79cc56af9fdbeaa91f50289b932d0959b41f9467 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 4 Feb 2009 11:56:11 +0100 Subject: [CPUFREQ] powernow-k8: Only print error message once, not per core. This is the typical message you get if you plug in a CPU which is newer than your BIOS. It's annoying seeing this message for each core. Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index acc06b03194e..c44853fc827a 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -794,7 +794,7 @@ static int find_psb_table(struct powernow_k8_data *data) * BIOS and Kernel Developer's Guide, which is available on * www.amd.com */ - printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n"); + printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); return -ENODEV; } @@ -1218,6 +1218,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) struct powernow_k8_data *data; cpumask_t oldmask; int rc; + static int print_once; if (!cpu_online(pol->cpu)) return -ENODEV; @@ -1240,11 +1241,19 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) * an UP version, and is deprecated by AMD. */ if (num_online_cpus() != 1) { - printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide" - " ACPI _PSS objects in a way that Linux " - "understands. Please report this to the Linux " - "ACPI maintainers and complain to your BIOS " - "vendor.\n"); + /* + * Replace this one with print_once as soon as such a + * thing gets introduced + */ + if (!print_once) { + WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS " + "does not provide ACPI _PSS objects " + "in a way that Linux understands. " + "Please report this to the Linux ACPI" + " maintainers and complain to your " + "BIOS vendor.\n"); + print_once++; + } kfree(data); return -ENODEV; } -- cgit v1.2.3 From 3a58df35a64a1e0ac32c30ea629a513dec2fe711 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 22:36:14 -0500 Subject: [CPUFREQ] checkpatch cleanups for acpi-cpufreq Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 36 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 4b1c319d30c3..3babe1f1e912 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -1,5 +1,5 @@ /* - * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $) + * acpi-cpufreq.c - ACPI Processor P-States Driver * * Copyright (C) 2001, 2002 Andy Grover * Copyright (C) 2001, 2002 Paul Diefenbaugh @@ -36,16 +36,18 @@ #include #include +#include +#include +#include + #include -#include #include #include #include -#include -#include -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "acpi-cpufreq", msg) MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); MODULE_DESCRIPTION("ACPI Processor P-States Driver"); @@ -95,7 +97,7 @@ static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) perf = data->acpi_data; - for (i=0; istate_count; i++) { + for (i = 0; i < perf->state_count; i++) { if (value == perf->states[i].status) return data->freq_table[i].frequency; } @@ -110,7 +112,7 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) msr &= INTEL_MSR_RANGE; perf = data->acpi_data; - for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { + for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { if (msr == perf->states[data->freq_table[i].index].status) return data->freq_table[i].frequency; } @@ -138,15 +140,13 @@ struct io_addr { u8 bit_width; }; -typedef union { - struct msr_addr msr; - struct io_addr io; -} drv_addr_union; - struct drv_cmd { unsigned int type; const struct cpumask *mask; - drv_addr_union addr; + union { + struct msr_addr msr; + struct io_addr io; + } addr; u32 val; }; @@ -369,7 +369,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, unsigned int cur_freq; unsigned int i; - for (i=0; i<100; i++) { + for (i = 0; i < 100; i++) { cur_freq = extract_freq(get_cur_val(mask), data); if (cur_freq == freq) return 1; @@ -494,7 +494,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) unsigned long freq; unsigned long freqn = perf->states[0].core_frequency * 1000; - for (i=0; i<(perf->state_count-1); i++) { + for (i = 0; i < (perf->state_count-1); i++) { freq = freqn; freqn = perf->states[i+1].core_frequency * 1000; if ((2 * cpu_khz) > (freqn + freq)) { @@ -673,7 +673,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) /* detect transition latency */ policy->cpuinfo.transition_latency = 0; - for (i=0; istate_count; i++) { + for (i = 0; i < perf->state_count; i++) { if ((perf->states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency) policy->cpuinfo.transition_latency = @@ -682,8 +682,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) data->max_freq = perf->states[0].core_frequency * 1000; /* table init */ - for (i=0; istate_count; i++) { - if (i>0 && perf->states[i].core_frequency >= + for (i = 0; i < perf->state_count; i++) { + if (i > 0 && perf->states[i].core_frequency >= data->freq_table[valid_states-1].frequency / 1000) continue; -- cgit v1.2.3 From 91420220d278584693d11a800b78fdc20e8fe10e Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 4 Feb 2009 15:28:54 -0500 Subject: [CPUFREQ] Use swap() in longhaul.c Remove hand-coded implementation of swap() Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/longhaul.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index dc03622a83a0..f1c51aea064d 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -511,13 +511,10 @@ static int __init longhaul_get_ranges(void) } } if (min_i != j) { - unsigned int temp; - temp = longhaul_table[j].frequency; - longhaul_table[j].frequency = longhaul_table[min_i].frequency; - longhaul_table[min_i].frequency = temp; - temp = longhaul_table[j].index; - longhaul_table[j].index = longhaul_table[min_i].index; - longhaul_table[min_i].index = temp; + swap(longhaul_table[j].frequency, + longhaul_table[min_i].frequency); + swap(longhaul_table[j].index, + longhaul_table[min_i].index); } } -- cgit v1.2.3 From de3ed81d746d7cfc22a0c645244ed9fa71e4a833 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 18 Feb 2009 18:28:22 +0000 Subject: [CPUFREQ] Change link order of x86 cpufreq modules Change the link order of the cpufreq modules to ensure that they're probed in the preferred order when statically linked in. Signed-off-by: Matthew Garrett Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile index 560f7760dae5..509296df294d 100644 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ b/arch/x86/kernel/cpu/cpufreq/Makefile @@ -1,6 +1,11 @@ +# Link order matters. K8 is preferred to ACPI because of firmware bugs in early +# K8 systems. ACPI is preferred to all other hardware-specific drivers. +# speedstep-* is preferred over p4-clockmod. + +obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o +obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o -obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_LONGHAUL) += longhaul.o obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o @@ -10,7 +15,6 @@ obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o -obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o -- cgit v1.2.3 From 0cb8bc256093e716d2a0a4a721f36c625a3f7634 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 18 Feb 2009 14:11:00 -0500 Subject: [CPUFREQ] powernow-k8: Use a common exit path. a0abd520fd69295f4a3735e29a9448a32e101d47 introduced a slew of extra kfree/return -ENODEV pairs. This replaces them all with gotos. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index c44853fc827a..a15ac94e0b9b 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1254,21 +1254,18 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) "BIOS vendor.\n"); print_once++; } - kfree(data); - return -ENODEV; + goto err_out; } if (pol->cpu != 0) { printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " "CPU other than CPU0. Complain to your BIOS " "vendor.\n"); - kfree(data); - return -ENODEV; + goto err_out; } rc = find_psb_table(data); - if (rc) { - kfree(data); - return -ENODEV; - } + if (rc) + goto err_out; + /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ pol->cpuinfo.transition_latency = ( @@ -1283,16 +1280,16 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); - goto err_out; + goto err_out_unmask; } if (pending_bit_stuck()) { printk(KERN_ERR PFX "failing init, change pending bit set\n"); - goto err_out; + goto err_out_unmask; } if (query_current_values_with_pending_wait(data)) - goto err_out; + goto err_out_unmask; if (cpu_family == CPU_OPTERON) fidvid_msr_init(); @@ -1335,10 +1332,11 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) return 0; -err_out: +err_out_unmask: set_cpus_allowed_ptr(current, &oldmask); powernow_k8_cpu_exit_acpi(data); +err_out: kfree(data); return -ENODEV; } -- cgit v1.2.3 From 199785eac892a1fa1b71cc22bec58e8b156d9311 Mon Sep 17 00:00:00 2001 From: Matthias-Christian Ott Date: Fri, 20 Feb 2009 20:52:17 -0500 Subject: [CPUFREQ] p4-clockmod reports wrong frequency. http://bugzilla.kernel.org/show_bug.cgi?id=10968 [ Updated for current tree, and fixed compile failure when p4-clockmod was built modular -- davej] From: Matthias-Christian Ott Signed-off-by: Dominik Brodowski Signed-off-by: Dave Jones --- arch/x86/include/asm/timer.h | 2 +- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 7 ++++++ arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 34 +++++++++++++++++------------ arch/x86/kernel/tsc.c | 3 --- 4 files changed, 28 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 2bb6a835c453..4f5c24724856 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -11,8 +11,8 @@ unsigned long native_calibrate_tsc(void); #ifdef CONFIG_X86_32 extern int timer_ack; +#endif extern int recalibrate_cpu_khz(void); -#endif /* CONFIG_X86_32 */ extern int no_timer_check; diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 46a2a7a53148..1778402305e0 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -31,6 +31,7 @@ #include #include +#include #include "speedstep-lib.h" @@ -224,6 +225,12 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) dprintk("has errata -- disabling low frequencies\n"); } + if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D && + c->x86_model < 2) { + /* switch to maximum frequency and measure result */ + cpufreq_p4_setdc(policy->cpu, DC_DISABLE); + recalibrate_cpu_khz(); + } /* get max frequency */ stock_freq = cpufreq_p4_get_frequency(c); if (!stock_freq) diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index 55c696daa05c..2e3c6862657b 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -16,6 +16,7 @@ #include #include +#include #include "speedstep-lib.h" #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ @@ -178,6 +179,15 @@ static unsigned int pentium4_get_frequency(void) u32 msr_lo, msr_hi, mult; unsigned int fsb = 0; unsigned int ret; + u8 fsb_code; + + /* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency + * to System Bus Frequency Ratio Field in the Processor Frequency + * Configuration Register of the MSR. Therefore the current + * frequency cannot be calculated and has to be measured. + */ + if (c->x86_model < 2) + return cpu_khz; rdmsr(0x2c, msr_lo, msr_hi); @@ -188,21 +198,17 @@ static unsigned int pentium4_get_frequency(void) * revision #12 in Table B-1: MSRs in the Pentium 4 and * Intel Xeon Processors, on page B-4 and B-5. */ - if (c->x86_model < 2) + fsb_code = (msr_lo >> 16) & 0x7; + switch (fsb_code) { + case 0: fsb = 100 * 1000; - else { - u8 fsb_code = (msr_lo >> 16) & 0x7; - switch (fsb_code) { - case 0: - fsb = 100 * 1000; - break; - case 1: - fsb = 13333 * 10; - break; - case 2: - fsb = 200 * 1000; - break; - } + break; + case 1: + fsb = 13333 * 10; + break; + case 2: + fsb = 200 * 1000; + break; } if (!fsb) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 599e58168631..5ad22f8f5f3a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -523,8 +523,6 @@ unsigned long native_calibrate_tsc(void) return tsc_pit_min; } -#ifdef CONFIG_X86_32 -/* Only called from the Powernow K7 cpu freq driver */ int recalibrate_cpu_khz(void) { #ifndef CONFIG_SMP @@ -546,7 +544,6 @@ int recalibrate_cpu_khz(void) EXPORT_SYMBOL(recalibrate_cpu_khz); -#endif /* CONFIG_X86_32 */ /* Accelerators for sched_clock() * convert from cycles(64bits) => nanoseconds (64bits) -- cgit v1.2.3 From eb3092cee79e4efa5d3e9c81c7e6ca90318cebb8 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Sat, 21 Feb 2009 01:58:47 +0000 Subject: [CPUFREQ] Make cpufreq-nforce2 less obnoxious Not owning an nforce2 is a sign of good taste, not an error. Signed-off-by: Matthew Garrett Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c index 99262906838c..733093d60436 100644 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -424,7 +424,7 @@ static int __init nforce2_init(void) /* detect chipset */ if (nforce2_detect_chipset()) { - printk(KERN_ERR PFX "No nForce2 chipset.\n"); + printk(KERN_INFO PFX "No nForce2 chipset.\n"); return -ENODEV; } -- cgit v1.2.3 From 83ce400928680a6c8123d492684b27857f5a2d95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Feb 2009 20:16:58 +0100 Subject: x86: set X86_FEATURE_TSC_RELIABLE If the TSC is constant and non-stop, also set it reliable. (We will turn this off in DMI quirks for multi-chassis systems) The performance number on a 16-way Nehalem system running 32 tasks that context-switch between each other is significant: sched_clock_stable=0 sched_clock_stable=1 .................... .................... 22.456925 million/sec 24.306972 million/sec [+8.2%] lmbench's "lat_ctx -s 0 2" goes from 0.63 microseconds to 0.59 microseconds - a 6.7% increase in context-switching performance. Perfstat of 1 million pipe context switches between two tasks: Performance counter stats for './pipe-test-1m': [before] [after] ............ ............ 37621.421089 36436.848378 task clock ticks (msecs) 0 0 CPU migrations (events) 2000274 2000189 context switches (events) 194 193 pagefaults (events) 8433799643 8171016416 CPU cycles (events) -3.21% 8370133368 8180999694 instructions (events) -2.31% 4158565 3895941 cache references (events) -6.74% 44312 46264 cache misses (events) 2349.287976 2279.362465 wall-time (msecs) -3.06% The speedup comes straight from the reduction in the instruction count. sched_clock_cpu() got simpler and the whole workload thus executes faster. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 24ff26a38ade..5fff00c70de0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -56,11 +57,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) /* * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate - * with P/T states and does not stop in deep C-states + * with P/T states and does not stop in deep C-states. + * + * It is also reliable across cores and sockets. (but not across + * cabinets - we turn it off in that case explicitly.) */ if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); + sched_clock_stable = 1; } } -- cgit v1.2.3 From 36e8abf3edcd2d207193ec5741d1a2a645d470a5 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 5 Mar 2009 00:16:26 -0500 Subject: [CPUFREQ] Prevent p4-clockmod from auto-binding to the ondemand governor. The latency of p4-clockmod sucks so hard that scaling on a regular basis with ondemand is a really bad idea. Signed-off-by: Matthew Garrett Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 1778402305e0..352cf9a49164 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -246,7 +246,10 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); /* cpuinfo and default policy values */ - policy->cpuinfo.transition_latency = 1000000; /* assumed */ + + /* the transition latency is set to be 1 higher than the maximum + * transition latency of the ondemand governor */ + policy->cpuinfo.transition_latency = 10000001; policy->cur = stock_freq; return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]); -- cgit v1.2.3 From 5f812de63ce515265ebd84e932c762136924ab84 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 6 Jan 2009 00:17:09 +0900 Subject: AMD IOMMU: remove unnecessary ifdef We try to avoid this type of ifdef and we can safely remove this ifdef. Signed-off-by: FUJITA Tomonori Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 5113c080f0c4..d7a7c4c063ff 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -23,9 +23,7 @@ #include #include #include -#ifdef CONFIG_IOMMU_API #include -#endif #include #include #include -- cgit v1.2.3 From 6074d5b0a319fe8400ff079a3c289406ca024321 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 10 Mar 2009 16:27:48 +0900 Subject: percpu: more flexibility for @dyn_size of pcpu_setup_first_chunk() Impact: cleanup, more flexibility for first chunk init Non-negative @dyn_size used to be allowed iff @unit_size wasn't auto. This restriction stemmed from implementation detail and made things a bit less intuitive. This patch allows @dyn_size to be specified regardless of @unit_size and swaps the positions of @dyn_size and @unit_size so that the parameter order makes more sense (static, reserved and dyn sizes followed by enclosing unit_size). While at it, add @unit_size >= PCPU_MIN_UNIT_SIZE sanity check. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 13 ++++++------- include/linux/percpu.h | 2 +- mm/percpu.c | 28 ++++++++++++++-------------- 3 files changed, 21 insertions(+), 22 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index efa615f2bf43..e41c51f6ada1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -233,8 +233,8 @@ proceed: "%zu bytes\n", vm.addr, static_size); ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, - PMD_SIZE, dyn_size, vm.addr, NULL); + PERCPU_FIRST_CHUNK_RESERVE, dyn_size, + PMD_SIZE, vm.addr, NULL); goto out_free_ar; enomem: @@ -315,9 +315,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); return pcpu_setup_first_chunk(pcpue_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, - pcpue_unit_size, dyn_size, - pcpue_ptr, NULL); + PERCPU_FIRST_CHUNK_RESERVE, dyn_size, + pcpue_unit_size, pcpue_ptr, NULL); } /* @@ -375,8 +374,8 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pcpu4k_nr_static_pages, static_size); ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL, - pcpu4k_populate_pte); + PERCPU_FIRST_CHUNK_RESERVE, -1, + -1, NULL, pcpu4k_populate_pte); goto out_free_ar; enomem: diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 54a968b4b924..fb455dcc59c7 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -107,7 +107,7 @@ typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, - ssize_t unit_size, ssize_t dyn_size, + ssize_t dyn_size, ssize_t unit_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); diff --git a/mm/percpu.c b/mm/percpu.c index c6f38a2aface..2f94661d3e36 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1027,8 +1027,8 @@ EXPORT_SYMBOL_GPL(free_percpu); * @get_page_fn: callback to fetch page pointer * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes - * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto * @base_addr: mapped address, NULL for auto * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary * @@ -1053,14 +1053,14 @@ EXPORT_SYMBOL_GPL(free_percpu); * limited offset range for symbol relocations to guarantee module * percpu symbols fall inside the relocatable range. * + * @dyn_size, if non-negative, determines the number of bytes + * available for dynamic allocation in the first chunk. Specifying + * non-negative value makes percpu leave alone the area beyond + * @static_size + @reserved_size + @dyn_size. + * * @unit_size, if non-negative, specifies unit size and must be * aligned to PAGE_SIZE and equal to or larger than @static_size + - * @reserved_size + @dyn_size. - * - * @dyn_size, if non-negative, limits the number of bytes available - * for dynamic allocation in the first chunk. Specifying non-negative - * value make percpu leave alone the area beyond @static_size + - * @reserved_size + @dyn_size. + * @reserved_size + if non-negative, @dyn_size. * * Non-null @base_addr means that the caller already allocated virtual * region for the first chunk and mapped it. percpu must not mess @@ -1083,12 +1083,14 @@ EXPORT_SYMBOL_GPL(free_percpu); */ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, - ssize_t unit_size, ssize_t dyn_size, + ssize_t dyn_size, ssize_t unit_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn) { static struct vm_struct first_vm; static int smap[2], dmap[2]; + size_t size_sum = static_size + reserved_size + + (dyn_size >= 0 ? dyn_size : 0); struct pcpu_chunk *schunk, *dchunk = NULL; unsigned int cpu; int nr_pages; @@ -1099,20 +1101,18 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); BUG_ON(!static_size); if (unit_size >= 0) { - BUG_ON(unit_size < static_size + reserved_size + - (dyn_size >= 0 ? dyn_size : 0)); + BUG_ON(unit_size < size_sum); BUG_ON(unit_size & ~PAGE_MASK); - } else { - BUG_ON(dyn_size >= 0); + BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); + } else BUG_ON(base_addr); - } BUG_ON(base_addr && populate_pte_fn); if (unit_size >= 0) pcpu_unit_pages = unit_size >> PAGE_SHIFT; else pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, - PFN_UP(static_size + reserved_size)); + PFN_UP(size_sum)); pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; -- cgit v1.2.3 From 66c3a75772247c31feabefb724e082220a1ab060 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 10 Mar 2009 16:27:48 +0900 Subject: percpu: generalize embedding first chunk setup helper Impact: code reorganization Separate out embedding first chunk setup helper from x86 embedding first chunk allocator and put it in mm/percpu.c. This will be used by the default percpu first chunk allocator and possibly by other archs. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 54 +++----------------------- include/linux/percpu.h | 4 ++ mm/percpu.c | 86 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 48 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index e41c51f6ada1..400331b50a53 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -257,31 +257,13 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) * Embedding allocator * * The first chunk is sized to just contain the static area plus - * module and dynamic reserves, and allocated as a contiguous area - * using bootmem allocator and used as-is without being mapped into - * vmalloc area. This enables the first chunk to piggy back on the - * linear physical PMD mapping and doesn't add any additional pressure - * to TLB. Note that if the needed size is smaller than the minimum - * unit size, the leftover is returned to the bootmem allocator. + * module and dynamic reserves and embedded into linear physical + * mapping so that it can use PMD mapping without additional TLB + * pressure. */ -static void *pcpue_ptr __initdata; -static size_t pcpue_size __initdata; -static size_t pcpue_unit_size __initdata; - -static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) -{ - size_t off = (size_t)pageno << PAGE_SHIFT; - - if (off >= pcpue_size) - return NULL; - - return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); -} - static ssize_t __init setup_pcpu_embed(size_t static_size) { - unsigned int cpu; - size_t dyn_size; + size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; /* * If large page isn't supported, there's no benefit in doing @@ -291,32 +273,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) if (!cpu_has_pse || pcpu_need_numa()) return -EINVAL; - /* allocate and copy */ - pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + - PERCPU_DYNAMIC_RESERVE); - pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); - dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; - - pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, - PAGE_SIZE); - if (!pcpue_ptr) - return -ENOMEM; - - for_each_possible_cpu(cpu) { - void *ptr = pcpue_ptr + cpu * pcpue_unit_size; - - free_bootmem(__pa(ptr + pcpue_size), - pcpue_unit_size - pcpue_size); - memcpy(ptr, __per_cpu_load, static_size); - } - - /* we're ready, commit */ - pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", - pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); - - return pcpu_setup_first_chunk(pcpue_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, dyn_size, - pcpue_unit_size, pcpue_ptr, NULL); + return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + reserve - PERCPU_FIRST_CHUNK_RESERVE, -1); } /* diff --git a/include/linux/percpu.h b/include/linux/percpu.h index fb455dcc59c7..ee5615d65211 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -111,6 +111,10 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); +extern ssize_t __init pcpu_embed_first_chunk( + size_t static_size, size_t reserved_size, + ssize_t dyn_size, ssize_t unit_size); + /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's diff --git a/mm/percpu.c b/mm/percpu.c index 2f94661d3e36..1aa5d8fbca12 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1238,3 +1238,89 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); return pcpu_unit_size; } + +/* + * Embedding first chunk setup helper. + */ +static void *pcpue_ptr __initdata; +static size_t pcpue_size __initdata; +static size_t pcpue_unit_size __initdata; + +static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) +{ + size_t off = (size_t)pageno << PAGE_SHIFT; + + if (off >= pcpue_size) + return NULL; + + return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); +} + +/** + * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem + * @static_size: the size of static percpu area in bytes + * @reserved_size: the size of reserved percpu area in bytes + * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto + * + * This is a helper to ease setting up embedded first percpu chunk and + * can be called where pcpu_setup_first_chunk() is expected. + * + * If this function is used to setup the first chunk, it is allocated + * as a contiguous area using bootmem allocator and used as-is without + * being mapped into vmalloc area. This enables the first chunk to + * piggy back on the linear physical mapping which often uses larger + * page size. + * + * When @dyn_size is positive, dynamic area might be larger than + * specified to fill page alignment. Also, when @dyn_size is auto, + * @dyn_size does not fill the whole first chunk but only what's + * necessary for page alignment after static and reserved areas. + * + * If the needed size is smaller than the minimum or specified unit + * size, the leftover is returned to the bootmem allocator. + * + * RETURNS: + * The determined pcpu_unit_size which can be used to initialize + * percpu access on success, -errno on failure. + */ +ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, + ssize_t dyn_size, ssize_t unit_size) +{ + unsigned int cpu; + + /* determine parameters and allocate */ + pcpue_size = PFN_ALIGN(static_size + reserved_size + + (dyn_size >= 0 ? dyn_size : 0)); + if (dyn_size != 0) + dyn_size = pcpue_size - static_size - reserved_size; + + if (unit_size >= 0) { + BUG_ON(unit_size < pcpue_size); + pcpue_unit_size = unit_size; + } else + pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); + + pcpue_ptr = __alloc_bootmem_nopanic( + num_possible_cpus() * pcpue_unit_size, + PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + if (!pcpue_ptr) + return -ENOMEM; + + /* return the leftover and copy */ + for_each_possible_cpu(cpu) { + void *ptr = pcpue_ptr + cpu * pcpue_unit_size; + + free_bootmem(__pa(ptr + pcpue_size), + pcpue_unit_size - pcpue_size); + memcpy(ptr, __per_cpu_load, static_size); + } + + /* we're ready, commit */ + pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", + pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); + + return pcpu_setup_first_chunk(pcpue_get_page, static_size, + reserved_size, dyn_size, + pcpue_unit_size, pcpue_ptr, NULL); +} -- cgit v1.2.3 From 8c5dfd25519bf302ba43daa59976c4d675a594a7 Mon Sep 17 00:00:00 2001 From: Stoyan Gaydarov Date: Tue, 10 Mar 2009 00:10:32 -0500 Subject: x86: BUG to BUG_ON changes Impact: cleanup Signed-off-by: Stoyan Gaydarov LKML-Reference: <1236661850-8237-8-git-send-email-stoyboyker@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 6 ++---- arch/x86/kernel/quirks.c | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 826d5c876278..f8869978bbb7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1078,8 +1078,7 @@ void __cpuinit cpu_init(void) atomic_inc(&init_mm.mm_count); me->active_mm = &init_mm; - if (me->mm) - BUG(); + BUG_ON(me->mm); enter_lazy_tlb(&init_mm, me); load_sp0(t, ¤t->thread); @@ -1145,8 +1144,7 @@ void __cpuinit cpu_init(void) */ atomic_inc(&init_mm.mm_count); curr->active_mm = &init_mm; - if (curr->mm) - BUG(); + BUG_ON(curr->mm); enter_lazy_tlb(&init_mm, curr); load_sp0(t, thread); diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 309949e9e1c1..6a5a2970f4c5 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -74,8 +74,7 @@ static void ich_force_hpet_resume(void) if (!force_hpet_address) return; - if (rcba_base == NULL) - BUG(); + BUG_ON(rcba_base == NULL); /* read the Function Disable register, dword mode only */ val = readl(rcba_base + 0x3404); -- cgit v1.2.3 From 9b779edf4b97798d037bb44fca2719ac184d0f14 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 10 Mar 2009 15:37:51 +0530 Subject: x86: cpu architecture debug code Introduce: cat /sys/kernel/debug/x86/cpu/* for Intel and AMD processors to view / debug the state of each CPU. By using this we can debug whole range of registers and other cpu information for debugging purpose and monitor how things are changing. This can be useful for developers as well as for users. Signed-off-by: Jaswinder Singh Rajput LKML-Reference: <1236701373.3387.4.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 6 + arch/x86/include/asm/cpu_debug.h | 193 ++++++++++ arch/x86/kernel/cpu/Makefile | 2 + arch/x86/kernel/cpu/cpu_debug.c | 784 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 985 insertions(+) create mode 100755 arch/x86/include/asm/cpu_debug.h create mode 100755 arch/x86/kernel/cpu/cpu_debug.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 31758378bcd2..03f0a3aa43b1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -931,6 +931,12 @@ config X86_CPUID with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to /dev/cpu/31/cpuid. +config X86_CPU_DEBUG + tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support" + ---help--- + If you select this option, this will provide various x86 CPUs + information through debugfs. + choice prompt "High Memory Support" default HIGHMEM4G if !X86_NUMAQ diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h new file mode 100755 index 000000000000..d24d64fcee04 --- /dev/null +++ b/arch/x86/include/asm/cpu_debug.h @@ -0,0 +1,193 @@ +#ifndef _ASM_X86_CPU_DEBUG_H +#define _ASM_X86_CPU_DEBUG_H + +/* + * CPU x86 architecture debug + * + * Copyright(C) 2009 Jaswinder Singh Rajput + */ + +/* Register flags */ +enum cpu_debug_bit { +/* Model Specific Registers (MSRs) */ + CPU_MC_BIT, /* Machine Check */ + CPU_MONITOR_BIT, /* Monitor */ + CPU_TIME_BIT, /* Time */ + CPU_PMC_BIT, /* Performance Monitor */ + CPU_PLATFORM_BIT, /* Platform */ + CPU_APIC_BIT, /* APIC */ + CPU_POWERON_BIT, /* Power-on */ + CPU_CONTROL_BIT, /* Control */ + CPU_FEATURES_BIT, /* Features control */ + CPU_LBRANCH_BIT, /* Last Branch */ + CPU_BIOS_BIT, /* BIOS */ + CPU_FREQ_BIT, /* Frequency */ + CPU_MTTR_BIT, /* MTRR */ + CPU_PERF_BIT, /* Performance */ + CPU_CACHE_BIT, /* Cache */ + CPU_SYSENTER_BIT, /* Sysenter */ + CPU_THERM_BIT, /* Thermal */ + CPU_MISC_BIT, /* Miscellaneous */ + CPU_DEBUG_BIT, /* Debug */ + CPU_PAT_BIT, /* PAT */ + CPU_VMX_BIT, /* VMX */ + CPU_CALL_BIT, /* System Call */ + CPU_BASE_BIT, /* BASE Address */ + CPU_SMM_BIT, /* System mgmt mode */ + CPU_SVM_BIT, /*Secure Virtual Machine*/ + CPU_OSVM_BIT, /* OS-Visible Workaround*/ +/* Standard Registers */ + CPU_TSS_BIT, /* Task Stack Segment */ + CPU_CR_BIT, /* Control Registers */ + CPU_DT_BIT, /* Descriptor Table */ +/* End of Registers flags */ + CPU_REG_ALL_BIT, /* Select all Registers */ +}; + +#define CPU_REG_ALL (~0) /* Select all Registers */ + +#define CPU_MC (1 << CPU_MC_BIT) +#define CPU_MONITOR (1 << CPU_MONITOR_BIT) +#define CPU_TIME (1 << CPU_TIME_BIT) +#define CPU_PMC (1 << CPU_PMC_BIT) +#define CPU_PLATFORM (1 << CPU_PLATFORM_BIT) +#define CPU_APIC (1 << CPU_APIC_BIT) +#define CPU_POWERON (1 << CPU_POWERON_BIT) +#define CPU_CONTROL (1 << CPU_CONTROL_BIT) +#define CPU_FEATURES (1 << CPU_FEATURES_BIT) +#define CPU_LBRANCH (1 << CPU_LBRANCH_BIT) +#define CPU_BIOS (1 << CPU_BIOS_BIT) +#define CPU_FREQ (1 << CPU_FREQ_BIT) +#define CPU_MTRR (1 << CPU_MTTR_BIT) +#define CPU_PERF (1 << CPU_PERF_BIT) +#define CPU_CACHE (1 << CPU_CACHE_BIT) +#define CPU_SYSENTER (1 << CPU_SYSENTER_BIT) +#define CPU_THERM (1 << CPU_THERM_BIT) +#define CPU_MISC (1 << CPU_MISC_BIT) +#define CPU_DEBUG (1 << CPU_DEBUG_BIT) +#define CPU_PAT (1 << CPU_PAT_BIT) +#define CPU_VMX (1 << CPU_VMX_BIT) +#define CPU_CALL (1 << CPU_CALL_BIT) +#define CPU_BASE (1 << CPU_BASE_BIT) +#define CPU_SMM (1 << CPU_SMM_BIT) +#define CPU_SVM (1 << CPU_SVM_BIT) +#define CPU_OSVM (1 << CPU_OSVM_BIT) +#define CPU_TSS (1 << CPU_TSS_BIT) +#define CPU_CR (1 << CPU_CR_BIT) +#define CPU_DT (1 << CPU_DT_BIT) + +/* Register file flags */ +enum cpu_file_bit { + CPU_INDEX_BIT, /* index */ + CPU_VALUE_BIT, /* value */ +}; + +#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) + +/* + * DisplayFamily_DisplayModel Processor Families/Processor Number Series + * -------------------------- ------------------------------------------ + * 05_01, 05_02, 05_04 Pentium, Pentium with MMX + * + * 06_01 Pentium Pro + * 06_03, 06_05 Pentium II Xeon, Pentium II + * 06_07, 06_08, 06_0A, 06_0B Pentium III Xeon, Pentum III + * + * 06_09, 060D Pentium M + * + * 06_0E Core Duo, Core Solo + * + * 06_0F Xeon 3000, 3200, 5100, 5300, 7300 series, + * Core 2 Quad, Core 2 Extreme, Core 2 Duo, + * Pentium dual-core + * 06_17 Xeon 5200, 5400 series, Core 2 Quad Q9650 + * + * 06_1C Atom + * + * 0F_00, 0F_01, 0F_02 Xeon, Xeon MP, Pentium 4 + * 0F_03, 0F_04 Xeon, Xeon MP, Pentium 4, Pentium D + * + * 0F_06 Xeon 7100, 5000 Series, Xeon MP, + * Pentium 4, Pentium D + */ + +/* Register processors bits */ +enum cpu_processor_bit { + CPU_NONE, +/* Intel */ + CPU_INTEL_PENTIUM_BIT, + CPU_INTEL_P6_BIT, + CPU_INTEL_PENTIUM_M_BIT, + CPU_INTEL_CORE_BIT, + CPU_INTEL_CORE2_BIT, + CPU_INTEL_ATOM_BIT, + CPU_INTEL_XEON_P4_BIT, + CPU_INTEL_XEON_MP_BIT, +}; + +#define CPU_ALL (~0) /* Select all CPUs */ + +#define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT) +#define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT) +#define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT) +#define CPU_INTEL_CORE (1 << CPU_INTEL_CORE_BIT) +#define CPU_INTEL_CORE2 (1 << CPU_INTEL_CORE2_BIT) +#define CPU_INTEL_ATOM (1 << CPU_INTEL_ATOM_BIT) +#define CPU_INTEL_XEON_P4 (1 << CPU_INTEL_XEON_P4_BIT) +#define CPU_INTEL_XEON_MP (1 << CPU_INTEL_XEON_MP_BIT) + +#define CPU_INTEL_PX (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M) +#define CPU_INTEL_COREX (CPU_INTEL_CORE | CPU_INTEL_CORE2) +#define CPU_INTEL_XEON (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP) +#define CPU_CO_AT (CPU_INTEL_CORE | CPU_INTEL_ATOM) +#define CPU_C2_AT (CPU_INTEL_CORE2 | CPU_INTEL_ATOM) +#define CPU_CX_AT (CPU_INTEL_COREX | CPU_INTEL_ATOM) +#define CPU_CX_XE (CPU_INTEL_COREX | CPU_INTEL_XEON) +#define CPU_P6_XE (CPU_INTEL_P6 | CPU_INTEL_XEON) +#define CPU_PM_CO_AT (CPU_INTEL_PENTIUM_M | CPU_CO_AT) +#define CPU_C2_AT_XE (CPU_C2_AT | CPU_INTEL_XEON) +#define CPU_CX_AT_XE (CPU_CX_AT | CPU_INTEL_XEON) +#define CPU_P6_CX_AT (CPU_INTEL_P6 | CPU_CX_AT) +#define CPU_P6_CX_XE (CPU_P6_XE | CPU_INTEL_COREX) +#define CPU_P6_CX_AT_XE (CPU_INTEL_P6 | CPU_CX_AT_XE) +#define CPU_PM_CX_AT_XE (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE) +#define CPU_PM_CX_AT (CPU_INTEL_PENTIUM_M | CPU_CX_AT) +#define CPU_PM_CX_XE (CPU_INTEL_PENTIUM_M | CPU_CX_XE) +#define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT) +#define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE) + +/* Select all Intel CPUs*/ +#define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE) + +#define MAX_CPU_FILES 512 + +struct cpu_private { + unsigned cpu; + unsigned type; + unsigned reg; + unsigned file; +}; + +struct cpu_debug_base { + char *name; /* Register name */ + unsigned flag; /* Register flag */ +}; + +struct cpu_cpuX_base { + struct dentry *dentry; /* Register dentry */ + int init; /* Register index file */ +}; + +struct cpu_file_base { + char *name; /* Register file name */ + unsigned flag; /* Register file flag */ +}; + +struct cpu_debug_range { + unsigned min; /* Register range min */ + unsigned max; /* Register range max */ + unsigned flag; /* Supported flags */ + unsigned model; /* Supported models */ +}; + +#endif /* _ASM_X86_CPU_DEBUG_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 82db7f45e2de..d4356f8b7522 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -14,6 +14,8 @@ obj-y += vmware.o hypervisor.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o +obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o + obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c new file mode 100755 index 000000000000..0bdf4daba205 --- /dev/null +++ b/arch/x86/kernel/cpu/cpu_debug.c @@ -0,0 +1,784 @@ +/* + * CPU x86 architecture debug code + * + * Copyright(C) 2009 Jaswinder Singh Rajput + * + * For licencing details see kernel-base/COPYING + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); +static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); +static DEFINE_PER_CPU(unsigned, cpu_modelflag); +static DEFINE_PER_CPU(int, cpu_priv_count); +static DEFINE_PER_CPU(unsigned, cpu_model); + +static DEFINE_MUTEX(cpu_debug_lock); + +static struct dentry *cpu_debugfs_dir; + +static struct cpu_debug_base cpu_base[] = { + { "mc", CPU_MC }, /* Machine Check */ + { "monitor", CPU_MONITOR }, /* Monitor */ + { "time", CPU_TIME }, /* Time */ + { "pmc", CPU_PMC }, /* Performance Monitor */ + { "platform", CPU_PLATFORM }, /* Platform */ + { "apic", CPU_APIC }, /* APIC */ + { "poweron", CPU_POWERON }, /* Power-on */ + { "control", CPU_CONTROL }, /* Control */ + { "features", CPU_FEATURES }, /* Features control */ + { "lastbranch", CPU_LBRANCH }, /* Last Branch */ + { "bios", CPU_BIOS }, /* BIOS */ + { "freq", CPU_FREQ }, /* Frequency */ + { "mtrr", CPU_MTRR }, /* MTRR */ + { "perf", CPU_PERF }, /* Performance */ + { "cache", CPU_CACHE }, /* Cache */ + { "sysenter", CPU_SYSENTER }, /* Sysenter */ + { "therm", CPU_THERM }, /* Thermal */ + { "misc", CPU_MISC }, /* Miscellaneous */ + { "debug", CPU_DEBUG }, /* Debug */ + { "pat", CPU_PAT }, /* PAT */ + { "vmx", CPU_VMX }, /* VMX */ + { "call", CPU_CALL }, /* System Call */ + { "base", CPU_BASE }, /* BASE Address */ + { "smm", CPU_SMM }, /* System mgmt mode */ + { "svm", CPU_SVM }, /*Secure Virtial Machine*/ + { "osvm", CPU_OSVM }, /* OS-Visible Workaround*/ + { "tss", CPU_TSS }, /* Task Stack Segment */ + { "cr", CPU_CR }, /* Control Registers */ + { "dt", CPU_DT }, /* Descriptor Table */ + { "registers", CPU_REG_ALL }, /* Select all Registers */ +}; + +static struct cpu_file_base cpu_file[] = { + { "index", CPU_REG_ALL }, /* index */ + { "value", CPU_REG_ALL }, /* value */ +}; + +/* Intel Registers Range */ +static struct cpu_debug_range cpu_intel_range[] = { + { 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL }, + { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE }, + { 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL }, + { 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM }, + { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE }, + { 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE }, + + { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE }, + { 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON }, + { 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON }, + { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE }, + + { 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE }, + { 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT }, + { 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT }, + { 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM }, + + { 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE }, + { 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 }, + { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE }, + { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON }, + + { 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT }, + { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT }, + { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT }, + { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE }, + + { 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 }, + { 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 }, + { 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX }, + { 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 }, + { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT }, + + { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE }, + { 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE }, + { 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE }, + { 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT }, + { 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE }, + { 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE }, + { 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE }, + { 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE }, + + { 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT }, + { 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON }, + { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE }, + { 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON }, + { 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE }, + { 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 }, + { 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE }, + { 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 }, + + { 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE }, + { 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE }, + { 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE }, + { 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE }, + { 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE }, + { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE }, + + { 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON }, + { 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE }, + { 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON }, + { 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT }, + { 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON }, + { 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT }, + { 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON }, + { 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON }, + { 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON }, + { 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON }, + { 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE }, + { 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON }, + + { 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE }, + { 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON }, + { 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE }, + { 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON }, + { 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE }, + { 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON }, + { 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE }, + { 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON }, + { 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE }, + { 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE }, + { 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE }, + + { 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE }, + { 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON }, + { 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON }, + + { 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP }, + + { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON }, + { 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON }, + { 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON }, + { 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON }, +}; + +/* AMD Registers Range */ +static struct cpu_debug_range cpu_amd_range[] = { + { 0x00000010, 0x00000010, CPU_TIME, CPU_ALL, }, + { 0x0000001B, 0x0000001B, CPU_APIC, CPU_ALL, }, + { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_ALL, }, + + { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_ALL, }, + { 0x00000179, 0x0000017A, CPU_MC, CPU_ALL, }, + { 0x0000017B, 0x0000017B, CPU_MC, CPU_ALL, }, + { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_ALL, }, + { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_ALL, }, + + { 0x00000200, 0x0000020F, CPU_MTRR, CPU_ALL, }, + { 0x00000250, 0x00000250, CPU_MTRR, CPU_ALL, }, + { 0x00000258, 0x00000259, CPU_MTRR, CPU_ALL, }, + { 0x00000268, 0x0000026F, CPU_MTRR, CPU_ALL, }, + { 0x00000277, 0x00000277, CPU_PAT, CPU_ALL, }, + { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_ALL, }, + + { 0x00000400, 0x00000417, CPU_MC, CPU_ALL, }, + + { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_ALL, }, + { 0xC0000081, 0xC0000084, CPU_CALL, CPU_ALL, }, + { 0xC0000100, 0xC0000102, CPU_BASE, CPU_ALL, }, + { 0xC0000103, 0xC0000103, CPU_TIME, CPU_ALL, }, + + { 0xC0000408, 0xC000040A, CPU_MC, CPU_ALL, }, + + { 0xc0010000, 0xc0010007, CPU_PMC, CPU_ALL, }, + { 0xc0010010, 0xc0010010, CPU_MTRR, CPU_ALL, }, + { 0xc0010016, 0xc001001A, CPU_MTRR, CPU_ALL, }, + { 0xc001001D, 0xc001001D, CPU_MTRR, CPU_ALL, }, + { 0xc0010030, 0xc0010035, CPU_BIOS, CPU_ALL, }, + { 0xc0010056, 0xc0010056, CPU_SMM, CPU_ALL, }, + { 0xc0010061, 0xc0010063, CPU_SMM, CPU_ALL, }, + { 0xc0010074, 0xc0010074, CPU_MC, CPU_ALL, }, + { 0xc0010111, 0xc0010113, CPU_SMM, CPU_ALL, }, + { 0xc0010114, 0xc0010118, CPU_SVM, CPU_ALL, }, + { 0xc0010119, 0xc001011A, CPU_SMM, CPU_ALL, }, + { 0xc0010140, 0xc0010141, CPU_OSVM, CPU_ALL, }, + { 0xc0010156, 0xc0010156, CPU_SMM, CPU_ALL, }, +}; + + +static int get_cpu_modelflag(unsigned cpu) +{ + int flag; + + switch (per_cpu(cpu_model, cpu)) { + /* Intel */ + case 0x0501: + case 0x0502: + case 0x0504: + flag = CPU_INTEL_PENTIUM; + break; + case 0x0601: + case 0x0603: + case 0x0605: + case 0x0607: + case 0x0608: + case 0x060A: + case 0x060B: + flag = CPU_INTEL_P6; + break; + case 0x0609: + case 0x060D: + flag = CPU_INTEL_PENTIUM_M; + break; + case 0x060E: + flag = CPU_INTEL_CORE; + break; + case 0x060F: + case 0x0617: + flag = CPU_INTEL_CORE2; + break; + case 0x061C: + flag = CPU_INTEL_ATOM; + break; + case 0x0F00: + case 0x0F01: + case 0x0F02: + case 0x0F03: + case 0x0F04: + flag = CPU_INTEL_XEON_P4; + break; + case 0x0F06: + flag = CPU_INTEL_XEON_MP; + break; + default: + flag = CPU_NONE; + break; + } + + return flag; +} + +static int get_cpu_range_count(unsigned cpu) +{ + int index; + + switch (per_cpu(cpu_model, cpu) >> 16) { + case X86_VENDOR_INTEL: + index = ARRAY_SIZE(cpu_intel_range); + break; + case X86_VENDOR_AMD: + index = ARRAY_SIZE(cpu_amd_range); + break; + default: + index = 0; + break; + } + + return index; +} + +static int is_typeflag_valid(unsigned cpu, unsigned flag) +{ + unsigned vendor, modelflag; + int i, index; + + /* Standard Registers should be always valid */ + if (flag >= CPU_TSS) + return 1; + + modelflag = per_cpu(cpu_modelflag, cpu); + vendor = per_cpu(cpu_model, cpu) >> 16; + index = get_cpu_range_count(cpu); + + for (i = 0; i < index; i++) { + switch (vendor) { + case X86_VENDOR_INTEL: + if ((cpu_intel_range[i].model & modelflag) && + (cpu_intel_range[i].flag & flag)) + return 1; + break; + case X86_VENDOR_AMD: + if (cpu_amd_range[i].flag & flag) + return 1; + break; + } + } + + /* Invalid */ + return 0; +} + +static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, + int index, unsigned flag) +{ + unsigned modelflag; + + modelflag = per_cpu(cpu_modelflag, cpu); + *max = 0; + switch (per_cpu(cpu_model, cpu) >> 16) { + case X86_VENDOR_INTEL: + if ((cpu_intel_range[index].model & modelflag) && + (cpu_intel_range[index].flag & flag)) { + *min = cpu_intel_range[index].min; + *max = cpu_intel_range[index].max; + } + break; + case X86_VENDOR_AMD: + if (cpu_amd_range[index].flag & flag) { + *min = cpu_amd_range[index].min; + *max = cpu_amd_range[index].max; + } + break; + } + + return *max; +} + +/* This function can also be called with seq = NULL for printk */ +static void print_cpu_data(struct seq_file *seq, unsigned type, + u32 low, u32 high) +{ + struct cpu_private *priv; + u64 val = high; + + if (seq) { + priv = seq->private; + if (priv->file) { + val = (val << 32) | low; + seq_printf(seq, "0x%llx\n", val); + } else + seq_printf(seq, " %08x: %08x_%08x\n", + type, high, low); + } else + printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low); +} + +/* This function can also be called with seq = NULL for printk */ +static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) +{ + unsigned msr, msr_min, msr_max; + struct cpu_private *priv; + u32 low, high; + int i, range; + + if (seq) { + priv = seq->private; + if (priv->file) { + if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg, + &low, &high)) + print_cpu_data(seq, priv->reg, low, high); + return; + } + } + + range = get_cpu_range_count(cpu); + + for (i = 0; i < range; i++) { + if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag)) + continue; + + for (msr = msr_min; msr <= msr_max; msr++) { + if (rdmsr_safe_on_cpu(cpu, msr, &low, &high)) + continue; + print_cpu_data(seq, msr, low, high); + } + } +} + +static void print_tss(void *arg) +{ + struct pt_regs *regs = task_pt_regs(current); + struct seq_file *seq = arg; + unsigned int seg; + + seq_printf(seq, " RAX\t: %016lx\n", regs->ax); + seq_printf(seq, " RBX\t: %016lx\n", regs->bx); + seq_printf(seq, " RCX\t: %016lx\n", regs->cx); + seq_printf(seq, " RDX\t: %016lx\n", regs->dx); + + seq_printf(seq, " RSI\t: %016lx\n", regs->si); + seq_printf(seq, " RDI\t: %016lx\n", regs->di); + seq_printf(seq, " RBP\t: %016lx\n", regs->bp); + seq_printf(seq, " ESP\t: %016lx\n", regs->sp); + +#ifdef CONFIG_X86_64 + seq_printf(seq, " R08\t: %016lx\n", regs->r8); + seq_printf(seq, " R09\t: %016lx\n", regs->r9); + seq_printf(seq, " R10\t: %016lx\n", regs->r10); + seq_printf(seq, " R11\t: %016lx\n", regs->r11); + seq_printf(seq, " R12\t: %016lx\n", regs->r12); + seq_printf(seq, " R13\t: %016lx\n", regs->r13); + seq_printf(seq, " R14\t: %016lx\n", regs->r14); + seq_printf(seq, " R15\t: %016lx\n", regs->r15); +#endif + + asm("movl %%cs,%0" : "=r" (seg)); + seq_printf(seq, " CS\t: %04x\n", seg); + asm("movl %%ds,%0" : "=r" (seg)); + seq_printf(seq, " DS\t: %04x\n", seg); + seq_printf(seq, " SS\t: %04lx\n", regs->ss); + asm("movl %%es,%0" : "=r" (seg)); + seq_printf(seq, " ES\t: %04x\n", seg); + asm("movl %%fs,%0" : "=r" (seg)); + seq_printf(seq, " FS\t: %04x\n", seg); + asm("movl %%gs,%0" : "=r" (seg)); + seq_printf(seq, " GS\t: %04x\n", seg); + + seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags); + + seq_printf(seq, " EIP\t: %016lx\n", regs->ip); +} + +static void print_cr(void *arg) +{ + struct seq_file *seq = arg; + + seq_printf(seq, " cr0\t: %016lx\n", read_cr0()); + seq_printf(seq, " cr2\t: %016lx\n", read_cr2()); + seq_printf(seq, " cr3\t: %016lx\n", read_cr3()); + seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe()); +#ifdef CONFIG_X86_64 + seq_printf(seq, " cr8\t: %016lx\n", read_cr8()); +#endif +} + +static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt) +{ + seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size)); +} + +static void print_dt(void *seq) +{ + struct desc_ptr dt; + unsigned long ldt; + + /* IDT */ + store_idt((struct desc_ptr *)&dt); + print_desc_ptr("IDT", seq, dt); + + /* GDT */ + store_gdt((struct desc_ptr *)&dt); + print_desc_ptr("GDT", seq, dt); + + /* LDT */ + store_ldt(ldt); + seq_printf(seq, " LDT\t: %016lx\n", ldt); + + /* TR */ + store_tr(ldt); + seq_printf(seq, " TR\t: %016lx\n", ldt); +} + +static void print_dr(void *arg) +{ + struct seq_file *seq = arg; + unsigned long dr; + int i; + + for (i = 0; i < 8; i++) { + /* Ignore db4, db5 */ + if ((i == 4) || (i == 5)) + continue; + get_debugreg(dr, i); + seq_printf(seq, " dr%d\t: %016lx\n", i, dr); + } + + seq_printf(seq, "\n MSR\t:\n"); +} + +static void print_apic(void *arg) +{ + struct seq_file *seq = arg; + +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(seq, " LAPIC\t:\n"); + seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24); + seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR)); + seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI)); + seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI)); + seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI)); + seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR)); + seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR)); + seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV)); + seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR)); + seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR)); + seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR)); + seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2)); + seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT)); + seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR)); + seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC)); + seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0)); + seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1)); + seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR)); + seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT)); + seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT)); + seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR)); +#endif /* CONFIG_X86_LOCAL_APIC */ + + seq_printf(seq, "\n MSR\t:\n"); +} + +static int cpu_seq_show(struct seq_file *seq, void *v) +{ + struct cpu_private *priv = seq->private; + + if (priv == NULL) + return -EINVAL; + + switch (cpu_base[priv->type].flag) { + case CPU_TSS: + smp_call_function_single(priv->cpu, print_tss, seq, 1); + break; + case CPU_CR: + smp_call_function_single(priv->cpu, print_cr, seq, 1); + break; + case CPU_DT: + smp_call_function_single(priv->cpu, print_dt, seq, 1); + break; + case CPU_DEBUG: + if (priv->file == CPU_INDEX_BIT) + smp_call_function_single(priv->cpu, print_dr, seq, 1); + print_msr(seq, priv->cpu, cpu_base[priv->type].flag); + break; + case CPU_APIC: + if (priv->file == CPU_INDEX_BIT) + smp_call_function_single(priv->cpu, print_apic, seq, 1); + print_msr(seq, priv->cpu, cpu_base[priv->type].flag); + break; + + default: + print_msr(seq, priv->cpu, cpu_base[priv->type].flag); + break; + } + seq_printf(seq, "\n"); + + return 0; +} + +static void *cpu_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos == 0) /* One time is enough ;-) */ + return seq; + + return NULL; +} + +static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + (*pos)++; + + return cpu_seq_start(seq, pos); +} + +static void cpu_seq_stop(struct seq_file *seq, void *v) +{ +} + +static const struct seq_operations cpu_seq_ops = { + .start = cpu_seq_start, + .next = cpu_seq_next, + .stop = cpu_seq_stop, + .show = cpu_seq_show, +}; + +static int cpu_seq_open(struct inode *inode, struct file *file) +{ + struct cpu_private *priv = inode->i_private; + struct seq_file *seq; + int err; + + err = seq_open(file, &cpu_seq_ops); + if (!err) { + seq = file->private_data; + seq->private = priv; + } + + return err; +} + +static const struct file_operations cpu_fops = { + .open = cpu_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, + unsigned file, struct dentry *dentry) +{ + struct cpu_private *priv = NULL; + + /* Already intialized */ + if (file == CPU_INDEX_BIT) + if (per_cpu(cpu_arr[type].init, cpu)) + return 0; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; + + priv->cpu = cpu; + priv->type = type; + priv->reg = reg; + priv->file = file; + mutex_lock(&cpu_debug_lock); + per_cpu(priv_arr[type], cpu) = priv; + per_cpu(cpu_priv_count, cpu)++; + mutex_unlock(&cpu_debug_lock); + + if (file) + debugfs_create_file(cpu_file[file].name, S_IRUGO, + dentry, (void *)priv, &cpu_fops); + else { + debugfs_create_file(cpu_base[type].name, S_IRUGO, + per_cpu(cpu_arr[type].dentry, cpu), + (void *)priv, &cpu_fops); + mutex_lock(&cpu_debug_lock); + per_cpu(cpu_arr[type].init, cpu) = 1; + mutex_unlock(&cpu_debug_lock); + } + + return 0; +} + +static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg, + struct dentry *dentry) +{ + unsigned file; + int err = 0; + + for (file = 0; file < ARRAY_SIZE(cpu_file); file++) { + err = cpu_create_file(cpu, type, reg, file, dentry); + if (err) + return err; + } + + return err; +} + +static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry) +{ + struct dentry *cpu_dentry = NULL; + unsigned reg, reg_min, reg_max; + int i, range, err = 0; + char reg_dir[12]; + u32 low, high; + + range = get_cpu_range_count(cpu); + + for (i = 0; i < range; i++) { + if (!get_cpu_range(cpu, ®_min, ®_max, i, + cpu_base[type].flag)) + continue; + + for (reg = reg_min; reg <= reg_max; reg++) { + if (rdmsr_safe_on_cpu(cpu, reg, &low, &high)) + continue; + + sprintf(reg_dir, "0x%x", reg); + cpu_dentry = debugfs_create_dir(reg_dir, dentry); + err = cpu_init_regfiles(cpu, type, reg, cpu_dentry); + if (err) + return err; + } + } + + return err; +} + +static int cpu_init_allreg(unsigned cpu, struct dentry *dentry) +{ + struct dentry *cpu_dentry = NULL; + unsigned type; + int err = 0; + + for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) { + if (!is_typeflag_valid(cpu, cpu_base[type].flag)) + continue; + cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry); + per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry; + + if (type < CPU_TSS_BIT) + err = cpu_init_msr(cpu, type, cpu_dentry); + else + err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT, + cpu_dentry); + if (err) + return err; + } + + return err; +} + +static int cpu_init_cpu(void) +{ + struct dentry *cpu_dentry = NULL; + struct cpuinfo_x86 *cpui; + char cpu_dir[12]; + unsigned cpu; + int err = 0; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { + cpui = &cpu_data(cpu); + if (!cpu_has(cpui, X86_FEATURE_MSR)) + continue; + per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) | + (cpui->x86 << 8) | + (cpui->x86_model)); + per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu); + + sprintf(cpu_dir, "cpu%d", cpu); + cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir); + err = cpu_init_allreg(cpu, cpu_dentry); + + pr_info("cpu%d(%d) debug files %d\n", + cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu)); + if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) { + pr_err("Register files count %d exceeds limit %d\n", + per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES); + per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES; + err = -ENFILE; + } + if (err) + return err; + } + + return err; +} + +static int __init cpu_debug_init(void) +{ + cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir); + + return cpu_init_cpu(); +} + +static void __exit cpu_debug_exit(void) +{ + int i, cpu; + + if (cpu_debugfs_dir) + debugfs_remove_recursive(cpu_debugfs_dir); + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++) + kfree(per_cpu(priv_arr[i], cpu)); +} + +module_init(cpu_debug_init); +module_exit(cpu_debug_exit); + +MODULE_AUTHOR("Jaswinder Singh Rajput"); +MODULE_DESCRIPTION("CPU Debug module"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From f24ade3a3332811a512ed3b6c6aa69486719b1d8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 10 Mar 2009 19:02:30 +0100 Subject: x86, sched_clock(): mark variables read-mostly Impact: micro-optimization There's a number of variables in the sched_clock() path that are in .data/.bss - but not marked __read_mostly. This creates the danger of accidental false cacheline sharing with some other, write-often variable. So mark them __read_mostly. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 599e58168631..9c8b71531ca8 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -17,20 +17,21 @@ #include #include -unsigned int cpu_khz; /* TSC clocks / usec, not used here */ +unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); -unsigned int tsc_khz; + +unsigned int __read_mostly tsc_khz; EXPORT_SYMBOL(tsc_khz); /* * TSC can be unstable due to cpufreq or due to unsynced TSCs */ -static int tsc_unstable; +static int __read_mostly tsc_unstable; /* native_sched_clock() is called before tsc_init(), so we must start with the TSC soft disabled to prevent erroneous rdtsc usage on !cpu_has_tsc processors */ -static int tsc_disabled = -1; +static int __read_mostly tsc_disabled = -1; static int tsc_clocksource_reliable; /* -- cgit v1.2.3 From 8229d754383e8cd905c38b56bd7365c7fc10dfc1 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 11 Mar 2009 19:13:49 +0530 Subject: x86: cpu architecture debug code, build fix, cleanup move store_ldt outside the CONFIG_PARAVIRT section and also clean up the code a bit. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 3 ++- arch/x86/kernel/cpu/cpu_debug.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index dc27705f5443..5623c50d67b2 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -91,7 +91,6 @@ static inline int desc_empty(const void *ptr) #define store_gdt(dtr) native_store_gdt(dtr) #define store_idt(dtr) native_store_idt(dtr) #define store_tr(tr) (tr = native_store_tr()) -#define store_ldt(ldt) asm("sldt %0":"=m" (ldt)) #define load_TLS(t, cpu) native_load_tls(t, cpu) #define set_ldt native_set_ldt @@ -112,6 +111,8 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) } #endif /* CONFIG_PARAVIRT */ +#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt)) + static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate) { diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index 0bdf4daba205..9abbcbd933cc 100755 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -427,7 +428,7 @@ static void print_tss(void *arg) seq_printf(seq, " CS\t: %04x\n", seg); asm("movl %%ds,%0" : "=r" (seg)); seq_printf(seq, " DS\t: %04x\n", seg); - seq_printf(seq, " SS\t: %04lx\n", regs->ss); + seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff); asm("movl %%es,%0" : "=r" (seg)); seq_printf(seq, " ES\t: %04x\n", seg); asm("movl %%fs,%0" : "=r" (seg)); -- cgit v1.2.3 From 9fa7266c2f0cdfcb09eacba2d3624bec7df78641 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 10:34:45 +0000 Subject: x86: remove leftover unwind annotations Impact: cleanup These got left in needlessly when ret_from_fork got simplified. Signed-off-by: Jan Beulich LKML-Reference: <49B8F355.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7ba4621c0dfa..54866bb5d38c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -416,7 +416,6 @@ ENTRY(ret_from_fork) GET_THREAD_INFO(%rcx) - CFI_REMEMBER_STATE RESTORE_REST testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? @@ -428,7 +427,6 @@ ENTRY(ret_from_fork) RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET jmp ret_from_sys_call # go to the SYSRET fastpath - CFI_RESTORE_STATE CFI_ENDPROC END(ret_from_fork) -- cgit v1.2.3 From c2810188c1b810c68139608a207befae0a4f1e69 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 10:38:55 +0000 Subject: x86-64: move save_paranoid into .kprobes.text Impact: mark save_paranoid as non-kprobe-able code This appears to be necessary as the function gets called from kprobes-unsafe exception handling stubs (i.e. which themselves live in .kprobes.text). Signed-off-by: Jan Beulich LKML-Reference: <49B8F44F.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 54866bb5d38c..a331ec38af9e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -368,6 +368,7 @@ ENTRY(save_rest) END(save_rest) /* save complete stack frame */ + .pushsection .kprobes.text, "ax" ENTRY(save_paranoid) XCPT_FRAME 1 RDI+8 cld @@ -396,6 +397,7 @@ ENTRY(save_paranoid) 1: ret CFI_ENDPROC END(save_paranoid) + .popsection /* * A newly forked process directly context switches into this address. -- cgit v1.2.3 From 02dde8b45c5460794b9052d7c12939fe3eb63c2c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 12:08:49 +0000 Subject: x86: move various CPU initialization objects into .cpuinit.rodata Impact: debuggability and micro-optimization Putting whatever is possible into the (final) .rodata section increases the likelihood of catching memory corruption bugs early, and reduces false cache line sharing. Signed-off-by: Jan Beulich LKML-Reference: <49B90961.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 +- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/centaur.c | 2 +- arch/x86/kernel/cpu/centaur_64.c | 2 +- arch/x86/kernel/cpu/common.c | 20 ++++++++++---------- arch/x86/kernel/cpu/cpu.h | 11 ++++++----- arch/x86/kernel/cpu/cyrix.c | 16 ++++++++-------- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/cpu/intel_cacheinfo.c | 8 ++++---- arch/x86/kernel/cpu/transmeta.c | 2 +- arch/x86/kernel/cpu/umc.c | 2 +- arch/x86/kernel/mmconf-fam10h_64.c | 2 +- 12 files changed, 36 insertions(+), 35 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 6882a735d9c0..8220ae69849d 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -29,7 +29,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) u32 regs[4]; const struct cpuid_bit *cb; - static const struct cpuid_bit cpuid_bits[] = { + static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, { 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f47df59016c5..7e4a459daa64 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -502,7 +502,7 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int } #endif -static struct cpu_dev amd_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst amd_cpu_dev = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 89bfdd9cacc6..983e0830f0da 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -468,7 +468,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) return size; } -static struct cpu_dev centaur_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst centaur_cpu_dev = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, .c_early_init = early_init_centaur, diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index a1625f5a1e78..51b09c48c9c7 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -25,7 +25,7 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); } -static struct cpu_dev centaur_cpu_dev __cpuinitdata = { +static const struct cpu_dev centaur_cpu_dev __cpuinitconst = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, .c_early_init = early_init_centaur, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f8869978bbb7..54cbe7690f93 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -70,7 +70,7 @@ cpumask_t cpu_sibling_setup_map; #endif /* CONFIG_X86_32 */ -static struct cpu_dev *this_cpu __cpuinitdata; +static const struct cpu_dev *this_cpu __cpuinitdata; DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 @@ -274,9 +274,9 @@ static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) */ /* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c) { - struct cpu_model_info *info; + const struct cpu_model_info *info; if (c->x86_model >= 16) return NULL; /* Range check */ @@ -321,7 +321,7 @@ void switch_to_new_gdt(int cpu) load_percpu_segment(cpu); } -static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; +static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { @@ -340,7 +340,7 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) #endif } -static struct cpu_dev __cpuinitdata default_cpu = { +static const struct cpu_dev __cpuinitconst default_cpu = { .c_init = default_init, .c_vendor = "Unknown", .c_x86_vendor = X86_VENDOR_UNKNOWN, @@ -634,12 +634,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) void __init early_cpu_init(void) { - struct cpu_dev **cdev; + const struct cpu_dev *const *cdev; int count = 0; printk("KERNEL supported cpus:\n"); for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { - struct cpu_dev *cpudev = *cdev; + const struct cpu_dev *cpudev = *cdev; unsigned int j; if (count >= X86_VENDOR_NUM) @@ -768,7 +768,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) /* If the model name is still unset, do table lookup. */ if (!c->x86_model_id[0]) { - char *p; + const char *p; p = table_lookup_model(c); if (p) strcpy(c->x86_model_id, p); @@ -847,7 +847,7 @@ struct msr_range { unsigned max; }; -static struct msr_range msr_range_array[] __cpuinitdata = { +static const struct msr_range msr_range_array[] __cpuinitconst = { { 0x00000000, 0x00000418}, { 0xc0000000, 0xc000040b}, { 0xc0010000, 0xc0010142}, @@ -894,7 +894,7 @@ __setup("noclflush", setup_noclflush); void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { - char *vendor = NULL; + const char *vendor = NULL; if (c->x86_vendor < X86_VENDOR_NUM) vendor = this_cpu->c_vendor; diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index de4094a39210..9469ecb5aeb8 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -5,15 +5,15 @@ struct cpu_model_info { int vendor; int family; - char *model_names[16]; + const char *model_names[16]; }; /* attempt to consolidate cpu attributes */ struct cpu_dev { - char * c_vendor; + const char * c_vendor; /* some have two possibilities for cpuid string */ - char * c_ident[2]; + const char * c_ident[2]; struct cpu_model_info c_models[4]; @@ -25,11 +25,12 @@ struct cpu_dev { }; #define cpu_dev_register(cpu_devX) \ - static struct cpu_dev *__cpu_dev_##cpu_devX __used \ + static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ __attribute__((__section__(".x86_cpu_dev.init"))) = \ &cpu_devX; -extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; +extern const struct cpu_dev *const __x86_cpu_dev_start[], + *const __x86_cpu_dev_end[]; extern void display_cacheinfo(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index ffd0f5ed071a..593171e967ef 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -61,23 +61,23 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) */ static unsigned char Cx86_dir0_msb __cpuinitdata = 0; -static char Cx86_model[][9] __cpuinitdata = { +static const char __cpuinitconst Cx86_model[][9] = { "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", "M II ", "Unknown" }; -static char Cx486_name[][5] __cpuinitdata = { +static const char __cpuinitconst Cx486_name[][5] = { "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", "SRx2", "DRx2" }; -static char Cx486S_name[][4] __cpuinitdata = { +static const char __cpuinitconst Cx486S_name[][4] = { "S", "S2", "Se", "S2e" }; -static char Cx486D_name[][4] __cpuinitdata = { +static const char __cpuinitconst Cx486D_name[][4] = { "DX", "DX2", "?", "?", "?", "DX4" }; static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; -static char cyrix_model_mult1[] __cpuinitdata = "12??43"; -static char cyrix_model_mult2[] __cpuinitdata = "12233445"; +static const char __cpuinitconst cyrix_model_mult1[] = "12??43"; +static const char __cpuinitconst cyrix_model_mult2[] = "12233445"; /* * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old @@ -435,7 +435,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) } } -static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst cyrix_cpu_dev = { .c_vendor = "Cyrix", .c_ident = { "CyrixInstead" }, .c_early_init = early_init_cyrix, @@ -446,7 +446,7 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { cpu_dev_register(cyrix_cpu_dev); -static struct cpu_dev nsc_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst nsc_cpu_dev = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, .c_init = init_nsc, diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 191117f1ad51..968f15129ed8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -410,7 +410,7 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i } #endif -static struct cpu_dev intel_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst intel_cpu_dev = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 7293508d8f5c..c471eb1a389c 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -32,7 +32,7 @@ struct _cache_table }; /* all the cache descriptor types we care about (no TLB or trace cache entries) */ -static struct _cache_table cache_table[] __cpuinitdata = +static const struct _cache_table __cpuinitconst cache_table[] = { { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ @@ -206,15 +206,15 @@ union l3_cache { unsigned val; }; -static unsigned short assocs[] __cpuinitdata = { +static const unsigned short __cpuinitconst assocs[] = { [1] = 1, [2] = 2, [4] = 4, [6] = 8, [8] = 16, [0xa] = 32, [0xb] = 48, [0xc] = 64, [0xf] = 0xffff // ?? }; -static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; -static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; +static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 }; +static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 }; static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 52b3fefbd5af..bb62b3e5caad 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -98,7 +98,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) #endif } -static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst transmeta_cpu_dev = { .c_vendor = "Transmeta", .c_ident = { "GenuineTMx86", "TransmetaCPU" }, .c_early_init = early_init_transmeta, diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index e777f79e0960..fd2c37bf7acb 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c @@ -8,7 +8,7 @@ * so no special init takes place. */ -static struct cpu_dev umc_cpu_dev __cpuinitdata = { +static const struct cpu_dev __cpuinitconst umc_cpu_dev = { .c_vendor = "UMC", .c_ident = { "UMC UMC UMC" }, .c_models = { diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index 666e43df51f9..712d15fdc416 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -226,7 +226,7 @@ static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d) return 0; } -static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { +static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = { { .callback = set_check_enable_amd_mmconf, .ident = "Sun Microsystems Machine", -- cgit v1.2.3 From 7a81d9a7da03d2f27840d659f97ef140d032f609 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 12:45:15 +0000 Subject: x86: smarten /proc/interrupts output Impact: change /proc/interrupts output ABI With the number of interrupts on large systems growing, assumptions on the width an interrupt number requires when converted to a decimal string turn invalid. Therefore, calculate the maximum number of digits dynamically. Signed-off-by: Jan Beulich LKML-Reference: <49B911EB.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index b864341dcc45..b8ac3b6cf776 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -45,16 +45,16 @@ void ack_bad_irq(unsigned int irq) /* * /proc/interrupts printing: */ -static int show_other_interrupts(struct seq_file *p) +static int show_other_interrupts(struct seq_file *p, int prec) { int j; - seq_printf(p, "NMI: "); + seq_printf(p, "%*s: ", prec, "NMI"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); seq_printf(p, " Non-maskable interrupts\n"); #ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "LOC: "); + seq_printf(p, "%*s: ", prec, "LOC"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); seq_printf(p, " Local timer interrupts\n"); @@ -66,40 +66,40 @@ static int show_other_interrupts(struct seq_file *p) seq_printf(p, " Platform interrupts\n"); } #ifdef CONFIG_SMP - seq_printf(p, "RES: "); + seq_printf(p, "%*s: ", prec, "RES"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); seq_printf(p, " Rescheduling interrupts\n"); - seq_printf(p, "CAL: "); + seq_printf(p, "%*s: ", prec, "CAL"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); seq_printf(p, " Function call interrupts\n"); - seq_printf(p, "TLB: "); + seq_printf(p, "%*s: ", prec, "TLB"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif #ifdef CONFIG_X86_MCE - seq_printf(p, "TRM: "); + seq_printf(p, "%*s: ", prec, "TRM"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); seq_printf(p, " Thermal event interrupts\n"); # ifdef CONFIG_X86_64 - seq_printf(p, "THR: "); + seq_printf(p, "%*s: ", prec, "THR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); # endif #endif #ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "SPU: "); + seq_printf(p, "%*s: ", prec, "SPU"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); seq_printf(p, " Spurious interrupts\n"); #endif - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); + seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); + seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); #endif return 0; } @@ -107,19 +107,22 @@ static int show_other_interrupts(struct seq_file *p) int show_interrupts(struct seq_file *p, void *v) { unsigned long flags, any_count = 0; - int i = *(loff_t *) v, j; + int i = *(loff_t *) v, j, prec; struct irqaction *action; struct irq_desc *desc; if (i > nr_irqs) return 0; + for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec) + j *= 10; + if (i == nr_irqs) - return show_other_interrupts(p); + return show_other_interrupts(p, prec); /* print header */ if (i == 0) { - seq_printf(p, " "); + seq_printf(p, "%*s", prec + 8, ""); for_each_online_cpu(j) seq_printf(p, "CPU%-8d", j); seq_putc(p, '\n'); @@ -140,7 +143,7 @@ int show_interrupts(struct seq_file *p, void *v) if (!action && !any_count) goto out; - seq_printf(p, "%3d: ", i); + seq_printf(p, "%*d: ", prec, i); #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else -- cgit v1.2.3 From 13c6c53282d99c82e79b02477efd2c1e30a991ef Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 12:37:34 +0000 Subject: x86, 32-bit: also use cpuinfo_x86's x86_{phys,virt}_bits members Impact: 32/64-bit consolidation In a first step, this allows fixing phys_addr_valid() for PAE (which until now reported all addresses to be valid). Subsequently, this will also allow simplifying some MTRR handling code. Signed-off-by: Jan Beulich LKML-Reference: <49B9101E.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 12 +++++++++++- arch/x86/kernel/cpu/intel.c | 5 +++++ arch/x86/mm/ioremap.c | 17 ++++++++--------- 4 files changed, 25 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 76139506c3e4..bd3406db1d68 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -75,9 +75,9 @@ struct cpuinfo_x86 { #else /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; +#endif __u8 x86_virt_bits; __u8 x86_phys_bits; -#endif /* CPUID returned core id bits: */ __u8 x86_coreid_bits; /* Max extended CPUID function supported: */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 826d5c876278..a95e9480bb9c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -549,13 +549,15 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } } -#ifdef CONFIG_X86_64 if (c->extended_cpuid_level >= 0x80000008) { u32 eax = cpuid_eax(0x80000008); c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; } +#ifdef CONFIG_X86_32 + else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) + c->x86_phys_bits = 36; #endif if (c->extended_cpuid_level >= 0x80000007) @@ -602,8 +604,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; + c->x86_phys_bits = 36; + c->x86_virt_bits = 48; #else c->x86_clflush_size = 32; + c->x86_phys_bits = 32; + c->x86_virt_bits = 32; #endif c->x86_cache_alignment = c->x86_clflush_size; @@ -726,9 +732,13 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_coreid_bits = 0; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; + c->x86_phys_bits = 36; + c->x86_virt_bits = 48; #else c->cpuid_level = -1; /* CPUID not detected */ c->x86_clflush_size = 32; + c->x86_phys_bits = 32; + c->x86_virt_bits = 32; #endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 191117f1ad51..ae769471042e 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -54,6 +54,11 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) c->x86_cache_alignment = 128; #endif + /* CPUID workaround for 0F33/0F34 CPU */ + if (c->x86 == 0xF && c->x86_model == 0x3 + && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) + c->x86_phys_bits = 36; + /* * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate * with P/T states and does not stop in deep C-states diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index aca924a30ee6..83ed74affba9 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -22,13 +22,17 @@ #include #include -#ifdef CONFIG_X86_64 - -static inline int phys_addr_valid(unsigned long addr) +static inline int phys_addr_valid(resource_size_t addr) { - return addr < (1UL << boot_cpu_data.x86_phys_bits); +#ifdef CONFIG_PHYS_ADDR_T_64BIT + return !(addr >> boot_cpu_data.x86_phys_bits); +#else + return 1; +#endif } +#ifdef CONFIG_X86_64 + unsigned long __phys_addr(unsigned long x) { if (x >= __START_KERNEL_map) { @@ -65,11 +69,6 @@ EXPORT_SYMBOL(__virt_addr_valid); #else -static inline int phys_addr_valid(unsigned long addr) -{ - return 1; -} - #ifdef CONFIG_DEBUG_VIRTUAL unsigned long __phys_addr(unsigned long x) { -- cgit v1.2.3 From 9a50156a1c7bfa65315facaffdfbed6513fcfd3b Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 12:41:23 +0000 Subject: x86: properly __init-annotate recent early_printk additions Impact: cleanup, save memory Don't keep code resident that's only needed during startup. Signed-off-by: Jan Beulich LKML-Reference: <49B91103.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 639ad98238a2..335f049d110f 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -250,7 +250,7 @@ static int dbgp_wait_until_complete(void) return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl); } -static void dbgp_mdelay(int ms) +static void __init dbgp_mdelay(int ms) { int i; @@ -311,7 +311,7 @@ static void dbgp_set_data(const void *buf, int size) writel(hi, &ehci_debug->data47); } -static void dbgp_get_data(void *buf, int size) +static void __init dbgp_get_data(void *buf, int size) { unsigned char *bytes = buf; u32 lo, hi; @@ -355,7 +355,7 @@ static int dbgp_bulk_write(unsigned devnum, unsigned endpoint, return ret; } -static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, +static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, int size) { u32 pids, addr, ctrl; @@ -386,8 +386,8 @@ static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, return ret; } -static int dbgp_control_msg(unsigned devnum, int requesttype, int request, - int value, int index, void *data, int size) +static int __init dbgp_control_msg(unsigned devnum, int requesttype, + int request, int value, int index, void *data, int size) { u32 pids, addr, ctrl; struct usb_ctrlrequest req; @@ -489,7 +489,7 @@ static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc) return 0; } -static int ehci_reset_port(int port) +static int __init ehci_reset_port(int port) { u32 portsc; u32 delay_time, delay; @@ -532,7 +532,7 @@ static int ehci_reset_port(int port) return -EBUSY; } -static int ehci_wait_for_port(int port) +static int __init ehci_wait_for_port(int port) { u32 status; int ret, reps; @@ -557,13 +557,13 @@ static inline void dbgp_printk(const char *fmt, ...) { } typedef void (*set_debug_port_t)(int port); -static void default_set_debug_port(int port) +static void __init default_set_debug_port(int port) { } -static set_debug_port_t set_debug_port = default_set_debug_port; +static set_debug_port_t __initdata set_debug_port = default_set_debug_port; -static void nvidia_set_debug_port(int port) +static void __init nvidia_set_debug_port(int port) { u32 dword; dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, -- cgit v1.2.3 From 82034d6f59b4772f4233bbb61c670290803a9960 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 12:57:10 +0000 Subject: x86: clean up output resulting from update_mptable option Impact: cleanup Without apic=verbose, using the update_mptable option would result in garbled and confusing output due to the inconsistent use of printk() vs apic_printk(). Signed-off-by: Jan Beulich LKML-Reference: <49B914B6.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index e8192401da47..47673e02ae58 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -890,12 +890,12 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, #ifdef CONFIG_X86_IO_APIC struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; - printk(KERN_INFO "OLD "); + apic_printk(APIC_VERBOSE, "OLD "); print_MP_intsrc_info(m); i = get_MP_intsrc_index(m); if (i > 0) { assign_to_mpc_intsrc(&mp_irqs[i], m); - printk(KERN_INFO "NEW "); + apic_printk(APIC_VERBOSE, "NEW "); print_mp_irq_info(&mp_irqs[i]); } else if (!i) { /* legacy, do nothing */ @@ -943,7 +943,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, continue; if (nr_m_spare > 0) { - printk(KERN_INFO "*NEW* found "); + apic_printk(APIC_VERBOSE, "*NEW* found\n"); nr_m_spare--; assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); m_spare[nr_m_spare] = NULL; -- cgit v1.2.3 From 5c0e6f035df983210e4d22213aed624ced502d3d Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 12 Mar 2009 13:07:23 +0000 Subject: x86: fix code paths used by update_mptable Impact: fix crashes under Xen due to unrobust e820 code find_e820_area_size() must return a properly distinguishable and out-of-bounds value when it fails, and -1UL does not meet that criteria on i386/PAE. Additionally, callers of the function must check against that value. early_reserve_e820() should be prepared for the region found to be outside of the addressable range on 32-bits. e820_update_range_map() should not blindly update e820, but should do all it work on the map it got a pointer passed for (which in 50% of the cases is &e820_saved). It must also not call e820_add_region(), as that again acts on e820 unconditionally. The issues were found when trying to make this option work in our Xen kernel (i.e. where some of the silent assumptions made in the code would not hold). Signed-off-by: Jan Beulich LKML-Reference: <49B9171B.76E4.0078.0@novell.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/check.c | 2 +- arch/x86/kernel/e820.c | 32 +++++++++++++++++++++++++------- 2 files changed, 26 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 2ac0ab71412a..b617b1164f1e 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -83,7 +83,7 @@ void __init setup_bios_corruption_check(void) u64 size; addr = find_e820_area_size(addr, &size, PAGE_SIZE); - if (addr == 0) + if (!(addr + 1)) break; if ((addr + size) > corruption_check_size) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 508bec1cee27..3cf6681ac80d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -421,7 +421,7 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, u64 size, unsigned old_type, unsigned new_type) { - int i; + unsigned int i, x; u64 real_updated_size = 0; BUG_ON(old_type == new_type); @@ -429,7 +429,7 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, if (size > (ULLONG_MAX - start)) size = ULLONG_MAX - start; - for (i = 0; i < e820.nr_map; i++) { + for (i = 0; i < e820x->nr_map; i++) { struct e820entry *ei = &e820x->map[i]; u64 final_start, final_end; if (ei->type != old_type) @@ -446,14 +446,23 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, final_end = min(start + size, ei->addr + ei->size); if (final_start >= final_end) continue; - e820_add_region(final_start, final_end - final_start, - new_type); + + x = e820x->nr_map; + if (x == ARRAY_SIZE(e820x->map)) { + printk(KERN_ERR "Too many memory map entries!\n"); + break; + } + e820x->map[x].addr = final_start; + e820x->map[x].size = final_end - final_start; + e820x->map[x].type = new_type; + e820x->nr_map++; + real_updated_size += final_end - final_start; - ei->size -= final_end - final_start; if (ei->addr < final_start) continue; ei->addr = final_end; + ei->size -= final_end - final_start; } return real_updated_size; } @@ -1020,8 +1029,8 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) continue; return addr; } - return -1UL; + return -1ULL; } /* @@ -1034,13 +1043,22 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) u64 start; start = startt; - while (size < sizet) + while (size < sizet && (start + 1)) start = find_e820_area_size(start, &size, align); if (size < sizet) return 0; +#ifdef CONFIG_X86_32 + if (start >= MAXMEM) + return 0; + if (start + size > MAXMEM) + size = MAXMEM - start; +#endif + addr = round_down(start + size - sizet, align); + if (addr < start) + return 0; e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); printk(KERN_INFO "update e820 for early_reserve_e820\n"); -- cgit v1.2.3 From 8ad9790588ee2e69118b2b294ddab6f3f0379ad9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 12 Mar 2009 18:43:54 -0700 Subject: x86: more MTRR debug printouts Impact: improve MTRR debugging messages There's still inefficiencies suspected with the MTRR sanitizing code, so make sure we get all the info we need from a dmesg. - Remove unneeded mtrr_show (It will only printout one time by first cpu, so it is no big deal.) - Also print out directly from get_mtrr, because it doesn't update mtrr_state. Signed-off-by: Yinghai Lu LKML-Reference: <49B9BA5A.40108@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 95 ++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 44 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 0c0a455fe95c..964403520100 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -33,14 +33,6 @@ u64 mtrr_tom2; struct mtrr_state_type mtrr_state = {}; EXPORT_SYMBOL_GPL(mtrr_state); -static int __initdata mtrr_show; -static int __init mtrr_debug(char *opt) -{ - mtrr_show = 1; - return 0; -} -early_param("mtrr.show", mtrr_debug); - /* * Returns the effective MTRR type for the region * Error returns: @@ -193,13 +185,51 @@ static void print_fixed(unsigned base, unsigned step, const mtrr_type*types) unsigned i; for (i = 0; i < 8; ++i, ++types, base += step) - printk(KERN_INFO "MTRR %05X-%05X %s\n", + printk(KERN_INFO " %05X-%05X %s\n", base, base + step - 1, mtrr_attrib_to_str(*types)); } static void prepare_set(void); static void post_set(void); +static void __init print_mtrr_state(void) +{ + unsigned int i; + int high_width; + + printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type)); + if (mtrr_state.have_fixed) { + printk(KERN_INFO "MTRR fixed ranges %sabled:\n", + mtrr_state.enabled & 1 ? "en" : "dis"); + print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); + for (i = 0; i < 2; ++i) + print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); + for (i = 0; i < 8; ++i) + print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); + } + printk(KERN_INFO "MTRR variable ranges %sabled:\n", + mtrr_state.enabled & 2 ? "en" : "dis"); + high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4; + for (i = 0; i < num_var_ranges; ++i) { + if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) + printk(KERN_INFO " %u base %0*X%05X000 mask %0*X%05X000 %s\n", + i, + high_width, + mtrr_state.var_ranges[i].base_hi, + mtrr_state.var_ranges[i].base_lo >> 12, + high_width, + mtrr_state.var_ranges[i].mask_hi, + mtrr_state.var_ranges[i].mask_lo >> 12, + mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); + else + printk(KERN_INFO " %u disabled\n", i); + } + if (mtrr_tom2) { + printk(KERN_INFO "TOM2: %016llx aka %lldM\n", + mtrr_tom2, mtrr_tom2>>20); + } +} + /* Grab all of the MTRR state for this CPU into *state */ void __init get_mtrr_state(void) { @@ -231,41 +261,9 @@ void __init get_mtrr_state(void) mtrr_tom2 |= low; mtrr_tom2 &= 0xffffff800000ULL; } - if (mtrr_show) { - int high_width; - - printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type)); - if (mtrr_state.have_fixed) { - printk(KERN_INFO "MTRR fixed ranges %sabled:\n", - mtrr_state.enabled & 1 ? "en" : "dis"); - print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); - for (i = 0; i < 2; ++i) - print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); - for (i = 0; i < 8; ++i) - print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); - } - printk(KERN_INFO "MTRR variable ranges %sabled:\n", - mtrr_state.enabled & 2 ? "en" : "dis"); - high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4; - for (i = 0; i < num_var_ranges; ++i) { - if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) - printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n", - i, - high_width, - mtrr_state.var_ranges[i].base_hi, - mtrr_state.var_ranges[i].base_lo >> 12, - high_width, - mtrr_state.var_ranges[i].mask_hi, - mtrr_state.var_ranges[i].mask_lo >> 12, - mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); - else - printk(KERN_INFO "MTRR %u disabled\n", i); - } - if (mtrr_tom2) { - printk(KERN_INFO "TOM2: %016llx aka %lldM\n", - mtrr_tom2, mtrr_tom2>>20); - } - } + + print_mtrr_state(); + mtrr_state_set = 1; /* PAT setup for BP. We need to go through sync steps here */ @@ -377,7 +375,12 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, unsigned int mask_lo, mask_hi, base_lo, base_hi; unsigned int tmp, hi; + /* + * get_mtrr doesn't need to update mtrr_state, also it could be called + * from any cpu, so try to print it out directly. + */ rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); + if ((mask_lo & 0x800) == 0) { /* Invalid (i.e. free) range */ *base = 0; @@ -407,6 +410,10 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, *size = -mask_lo; *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; *type = base_lo & 0xff; + + printk(KERN_DEBUG " get_mtrr: cpu%d reg%02d base=%010lx size=%010lx %s\n", + smp_processor_id(), reg, *base, *size, + mtrr_attrib_to_str(*type & 0xff)); } /** -- cgit v1.2.3 From c1ab7e93c6ddf8a068719b97b7e26c3d8eba7c32 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 Mar 2009 20:05:46 -0700 Subject: x86: print out mtrr_range_state when user specify size Impact: print more debug info Keep it consistent with autodetect version. Signed-off-by: Yinghai Lu LKML-Reference: <49B87C0A.4010105@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 236a401b8259..311a4734609b 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1424,6 +1424,8 @@ static int __init mtrr_cleanup(unsigned address_bits) if (!result[i].bad) { set_var_mtrr_all(address_bits); + printk(KERN_DEBUG "New variable MTRRs\n"); + print_out_mtrr_range_state(); return 1; } printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " -- cgit v1.2.3 From 0d890355bff25e1dc03a577a90ed80741489ca54 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 Mar 2009 20:07:39 -0700 Subject: x86: separate mtrr cleanup/mtrr_e820 trim to separate file Impact: cleanup mtrr main.c is too big, seperate mtrr cleanup and mtrr e820 trim code to another file. Signed-off-by: Yinghai Lu LKML-Reference: <49B87C7B.80809@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/Makefile | 2 +- arch/x86/kernel/cpu/mtrr/cleanup.c | 1089 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/mtrr/main.c | 1055 +--------------------------------- arch/x86/kernel/cpu/mtrr/mtrr.h | 3 + 4 files changed, 1094 insertions(+), 1055 deletions(-) create mode 100644 arch/x86/kernel/cpu/mtrr/cleanup.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile index 191fc0533649..f4361b56f8e9 100644 --- a/arch/x86/kernel/cpu/mtrr/Makefile +++ b/arch/x86/kernel/cpu/mtrr/Makefile @@ -1,3 +1,3 @@ -obj-y := main.o if.o generic.o state.o +obj-y := main.o if.o generic.o state.o cleanup.o obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c new file mode 100644 index 000000000000..58b58bbf7eb3 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -0,0 +1,1089 @@ +/* MTRR (Memory Type Range Register) cleanup + + Copyright (C) 2009 Yinghai Lu + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "mtrr.h" + +/* should be related to MTRR_VAR_RANGES nums */ +#define RANGE_NUM 256 + +struct res_range { + unsigned long start; + unsigned long end; +}; + +static int __init +add_range(struct res_range *range, int nr_range, unsigned long start, + unsigned long end) +{ + /* out of slots */ + if (nr_range >= RANGE_NUM) + return nr_range; + + range[nr_range].start = start; + range[nr_range].end = end; + + nr_range++; + + return nr_range; +} + +static int __init +add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, + unsigned long end) +{ + int i; + + /* try to merge it with old one */ + for (i = 0; i < nr_range; i++) { + unsigned long final_start, final_end; + unsigned long common_start, common_end; + + if (!range[i].end) + continue; + + common_start = max(range[i].start, start); + common_end = min(range[i].end, end); + if (common_start > common_end + 1) + continue; + + final_start = min(range[i].start, start); + final_end = max(range[i].end, end); + + range[i].start = final_start; + range[i].end = final_end; + return nr_range; + } + + /* need to add that */ + return add_range(range, nr_range, start, end); +} + +static void __init +subtract_range(struct res_range *range, unsigned long start, unsigned long end) +{ + int i, j; + + for (j = 0; j < RANGE_NUM; j++) { + if (!range[j].end) + continue; + + if (start <= range[j].start && end >= range[j].end) { + range[j].start = 0; + range[j].end = 0; + continue; + } + + if (start <= range[j].start && end < range[j].end && + range[j].start < end + 1) { + range[j].start = end + 1; + continue; + } + + + if (start > range[j].start && end >= range[j].end && + range[j].end > start - 1) { + range[j].end = start - 1; + continue; + } + + if (start > range[j].start && end < range[j].end) { + /* find the new spare */ + for (i = 0; i < RANGE_NUM; i++) { + if (range[i].end == 0) + break; + } + if (i < RANGE_NUM) { + range[i].end = range[j].end; + range[i].start = end + 1; + } else { + printk(KERN_ERR "run of slot in ranges\n"); + } + range[j].end = start - 1; + continue; + } + } +} + +static int __init cmp_range(const void *x1, const void *x2) +{ + const struct res_range *r1 = x1; + const struct res_range *r2 = x2; + long start1, start2; + + start1 = r1->start; + start2 = r2->start; + + return start1 - start2; +} + +struct var_mtrr_range_state { + unsigned long base_pfn; + unsigned long size_pfn; + mtrr_type type; +}; + +static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; +static int __initdata debug_print; + +static int __init +x86_get_mtrr_mem_range(struct res_range *range, int nr_range, + unsigned long extra_remove_base, + unsigned long extra_remove_size) +{ + unsigned long i, base, size; + mtrr_type type; + + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type != MTRR_TYPE_WRBACK) + continue; + base = range_state[i].base_pfn; + size = range_state[i].size_pfn; + nr_range = add_range_with_merge(range, nr_range, base, + base + size - 1); + } + if (debug_print) { + printk(KERN_DEBUG "After WB checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + } + + /* take out UC ranges */ + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type != MTRR_TYPE_UNCACHABLE && + type != MTRR_TYPE_WRPROT) + continue; + size = range_state[i].size_pfn; + if (!size) + continue; + base = range_state[i].base_pfn; + subtract_range(range, base, base + size - 1); + } + if (extra_remove_size) + subtract_range(range, extra_remove_base, + extra_remove_base + extra_remove_size - 1); + + /* get new range num */ + nr_range = 0; + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + nr_range++; + } + if (debug_print) { + printk(KERN_DEBUG "After UC checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + } + + /* sort the ranges */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + if (debug_print) { + printk(KERN_DEBUG "After sorting\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + } + + /* clear those is not used */ + for (i = nr_range; i < RANGE_NUM; i++) + memset(&range[i], 0, sizeof(range[i])); + + return nr_range; +} + +static struct res_range __initdata range[RANGE_NUM]; +static int __initdata nr_range; + +#ifdef CONFIG_MTRR_SANITIZER + +static unsigned long __init sum_ranges(struct res_range *range, int nr_range) +{ + unsigned long sum; + int i; + + sum = 0; + for (i = 0; i < nr_range; i++) + sum += range[i].end + 1 - range[i].start; + + return sum; +} + +static int enable_mtrr_cleanup __initdata = + CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; + +static int __init disable_mtrr_cleanup_setup(char *str) +{ + enable_mtrr_cleanup = 0; + return 0; +} +early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); + +static int __init enable_mtrr_cleanup_setup(char *str) +{ + enable_mtrr_cleanup = 1; + return 0; +} +early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); + +static int __init mtrr_cleanup_debug_setup(char *str) +{ + debug_print = 1; + return 0; +} +early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); + +struct var_mtrr_state { + unsigned long range_startk; + unsigned long range_sizek; + unsigned long chunk_sizek; + unsigned long gran_sizek; + unsigned int reg; +}; + +static void __init +set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, + unsigned char type, unsigned int address_bits) +{ + u32 base_lo, base_hi, mask_lo, mask_hi; + u64 base, mask; + + if (!sizek) { + fill_mtrr_var_range(reg, 0, 0, 0, 0); + return; + } + + mask = (1ULL << address_bits) - 1; + mask &= ~((((u64)sizek) << 10) - 1); + + base = ((u64)basek) << 10; + + base |= type; + mask |= 0x800; + + base_lo = base & ((1ULL<<32) - 1); + base_hi = base >> 32; + + mask_lo = mask & ((1ULL<<32) - 1); + mask_hi = mask >> 32; + + fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); +} + +static void __init +save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, + unsigned char type) +{ + range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); + range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); + range_state[reg].type = type; +} + +static void __init +set_var_mtrr_all(unsigned int address_bits) +{ + unsigned long basek, sizek; + unsigned char type; + unsigned int reg; + + for (reg = 0; reg < num_var_ranges; reg++) { + basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); + sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); + type = range_state[reg].type; + + set_var_mtrr(reg, basek, sizek, type, address_bits); + } +} + +static unsigned long to_size_factor(unsigned long sizek, char *factorp) +{ + char factor; + unsigned long base = sizek; + + if (base & ((1<<10) - 1)) { + /* not MB alignment */ + factor = 'K'; + } else if (base & ((1<<20) - 1)) { + factor = 'M'; + base >>= 10; + } else { + factor = 'G'; + base >>= 20; + } + + *factorp = factor; + + return base; +} + +static unsigned int __init +range_to_mtrr(unsigned int reg, unsigned long range_startk, + unsigned long range_sizek, unsigned char type) +{ + if (!range_sizek || (reg >= num_var_ranges)) + return reg; + + while (range_sizek) { + unsigned long max_align, align; + unsigned long sizek; + + /* Compute the maximum size I can make a range */ + if (range_startk) + max_align = ffs(range_startk) - 1; + else + max_align = 32; + align = fls(range_sizek) - 1; + if (align > max_align) + align = max_align; + + sizek = 1 << align; + if (debug_print) { + char start_factor = 'K', size_factor = 'K'; + unsigned long start_base, size_base; + + start_base = to_size_factor(range_startk, + &start_factor), + size_base = to_size_factor(sizek, &size_factor), + + printk(KERN_DEBUG "Setting variable MTRR %d, " + "base: %ld%cB, range: %ld%cB, type %s\n", + reg, start_base, start_factor, + size_base, size_factor, + (type == MTRR_TYPE_UNCACHABLE) ? "UC" : + ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other") + ); + } + save_var_mtrr(reg++, range_startk, sizek, type); + range_startk += sizek; + range_sizek -= sizek; + if (reg >= num_var_ranges) + break; + } + return reg; +} + +static unsigned __init +range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, + unsigned long sizek) +{ + unsigned long hole_basek, hole_sizek; + unsigned long second_basek, second_sizek; + unsigned long range0_basek, range0_sizek; + unsigned long range_basek, range_sizek; + unsigned long chunk_sizek; + unsigned long gran_sizek; + + hole_basek = 0; + hole_sizek = 0; + second_basek = 0; + second_sizek = 0; + chunk_sizek = state->chunk_sizek; + gran_sizek = state->gran_sizek; + + /* align with gran size, prevent small block used up MTRRs */ + range_basek = ALIGN(state->range_startk, gran_sizek); + if ((range_basek > basek) && basek) + return second_sizek; + state->range_sizek -= (range_basek - state->range_startk); + range_sizek = ALIGN(state->range_sizek, gran_sizek); + + while (range_sizek > state->range_sizek) { + range_sizek -= gran_sizek; + if (!range_sizek) + return 0; + } + state->range_sizek = range_sizek; + + /* try to append some small hole */ + range0_basek = state->range_startk; + range0_sizek = ALIGN(state->range_sizek, chunk_sizek); + + /* no increase */ + if (range0_sizek == state->range_sizek) { + if (debug_print) + printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", + range0_basek<<10, + (range0_basek + state->range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + state->range_sizek, MTRR_TYPE_WRBACK); + return 0; + } + + /* only cut back, when it is not the last */ + if (sizek) { + while (range0_basek + range0_sizek > (basek + sizek)) { + if (range0_sizek >= chunk_sizek) + range0_sizek -= chunk_sizek; + else + range0_sizek = 0; + + if (!range0_sizek) + break; + } + } + +second_try: + range_basek = range0_basek + range0_sizek; + + /* one hole in the middle */ + if (range_basek > basek && range_basek <= (basek + sizek)) + second_sizek = range_basek - basek; + + if (range0_sizek > state->range_sizek) { + + /* one hole in middle or at end */ + hole_sizek = range0_sizek - state->range_sizek - second_sizek; + + /* hole size should be less than half of range0 size */ + if (hole_sizek >= (range0_sizek >> 1) && + range0_sizek >= chunk_sizek) { + range0_sizek -= chunk_sizek; + second_sizek = 0; + hole_sizek = 0; + + goto second_try; + } + } + + if (range0_sizek) { + if (debug_print) + printk(KERN_DEBUG "range0: %016lx - %016lx\n", + range0_basek<<10, + (range0_basek + range0_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + range0_sizek, MTRR_TYPE_WRBACK); + } + + if (range0_sizek < state->range_sizek) { + /* need to handle left over */ + range_sizek = state->range_sizek - range0_sizek; + + if (debug_print) + printk(KERN_DEBUG "range: %016lx - %016lx\n", + range_basek<<10, + (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, + range_sizek, MTRR_TYPE_WRBACK); + } + + if (hole_sizek) { + hole_basek = range_basek - hole_sizek - second_sizek; + if (debug_print) + printk(KERN_DEBUG "hole: %016lx - %016lx\n", + hole_basek<<10, + (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, + hole_sizek, MTRR_TYPE_UNCACHABLE); + } + + return second_sizek; +} + +static void __init +set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, + unsigned long size_pfn) +{ + unsigned long basek, sizek; + unsigned long second_sizek = 0; + + if (state->reg >= num_var_ranges) + return; + + basek = base_pfn << (PAGE_SHIFT - 10); + sizek = size_pfn << (PAGE_SHIFT - 10); + + /* See if I can merge with the last range */ + if ((basek <= 1024) || + (state->range_startk + state->range_sizek == basek)) { + unsigned long endk = basek + sizek; + state->range_sizek = endk - state->range_startk; + return; + } + /* Write the range mtrrs */ + if (state->range_sizek != 0) + second_sizek = range_to_mtrr_with_hole(state, basek, sizek); + + /* Allocate an msr */ + state->range_startk = basek + second_sizek; + state->range_sizek = sizek - second_sizek; +} + +/* mininum size of mtrr block that can take hole */ +static u64 mtrr_chunk_size __initdata = (256ULL<<20); + +static int __init parse_mtrr_chunk_size_opt(char *p) +{ + if (!p) + return -EINVAL; + mtrr_chunk_size = memparse(p, &p); + return 0; +} +early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); + +/* granity of mtrr of block */ +static u64 mtrr_gran_size __initdata; + +static int __init parse_mtrr_gran_size_opt(char *p) +{ + if (!p) + return -EINVAL; + mtrr_gran_size = memparse(p, &p); + return 0; +} +early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); + +static int nr_mtrr_spare_reg __initdata = + CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; + +static int __init parse_mtrr_spare_reg(char *arg) +{ + if (arg) + nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); + return 0; +} + +early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); + +static int __init +x86_setup_var_mtrrs(struct res_range *range, int nr_range, + u64 chunk_size, u64 gran_size) +{ + struct var_mtrr_state var_state; + int i; + int num_reg; + + var_state.range_startk = 0; + var_state.range_sizek = 0; + var_state.reg = 0; + var_state.chunk_sizek = chunk_size >> 10; + var_state.gran_sizek = gran_size >> 10; + + memset(range_state, 0, sizeof(range_state)); + + /* Write the range etc */ + for (i = 0; i < nr_range; i++) + set_var_mtrr_range(&var_state, range[i].start, + range[i].end - range[i].start + 1); + + /* Write the last range */ + if (var_state.range_sizek != 0) + range_to_mtrr_with_hole(&var_state, 0, 0); + + num_reg = var_state.reg; + /* Clear out the extra MTRR's */ + while (var_state.reg < num_var_ranges) { + save_var_mtrr(var_state.reg, 0, 0, 0); + var_state.reg++; + } + + return num_reg; +} + +struct mtrr_cleanup_result { + unsigned long gran_sizek; + unsigned long chunk_sizek; + unsigned long lose_cover_sizek; + unsigned int num_reg; + int bad; +}; + +/* + * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G + * chunk size: gran_size, ..., 2G + * so we need (1+16)*8 + */ +#define NUM_RESULT 136 +#define PSHIFT (PAGE_SHIFT - 10) + +static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; +static unsigned long __initdata min_loss_pfn[RANGE_NUM]; + +static void __init print_out_mtrr_range_state(void) +{ + int i; + char start_factor = 'K', size_factor = 'K'; + unsigned long start_base, size_base; + mtrr_type type; + + for (i = 0; i < num_var_ranges; i++) { + + size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); + if (!size_base) + continue; + + size_base = to_size_factor(size_base, &size_factor), + start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); + start_base = to_size_factor(start_base, &start_factor), + type = range_state[i].type; + + printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", + i, start_base, start_factor, + size_base, size_factor, + (type == MTRR_TYPE_UNCACHABLE) ? "UC" : + ((type == MTRR_TYPE_WRPROT) ? "WP" : + ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) + ); + } +} + +static int __init mtrr_need_cleanup(void) +{ + int i; + mtrr_type type; + unsigned long size; + /* extra one for all 0 */ + int num[MTRR_NUM_TYPES + 1]; + + /* check entries number */ + memset(num, 0, sizeof(num)); + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + size = range_state[i].size_pfn; + if (type >= MTRR_NUM_TYPES) + continue; + if (!size) + type = MTRR_NUM_TYPES; + if (type == MTRR_TYPE_WRPROT) + type = MTRR_TYPE_UNCACHABLE; + num[type]++; + } + + /* check if we got UC entries */ + if (!num[MTRR_TYPE_UNCACHABLE]) + return 0; + + /* check if we only had WB and UC */ + if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != + num_var_ranges - num[MTRR_NUM_TYPES]) + return 0; + + return 1; +} + +static unsigned long __initdata range_sums; +static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, + unsigned long extra_remove_base, + unsigned long extra_remove_size, + int i) +{ + int num_reg; + static struct res_range range_new[RANGE_NUM]; + static int nr_range_new; + unsigned long range_sums_new; + + /* convert ranges to var ranges state */ + num_reg = x86_setup_var_mtrrs(range, nr_range, + chunk_size, gran_size); + + /* we got new setting in range_state, check it */ + memset(range_new, 0, sizeof(range_new)); + nr_range_new = x86_get_mtrr_mem_range(range_new, 0, + extra_remove_base, extra_remove_size); + range_sums_new = sum_ranges(range_new, nr_range_new); + + result[i].chunk_sizek = chunk_size >> 10; + result[i].gran_sizek = gran_size >> 10; + result[i].num_reg = num_reg; + if (range_sums < range_sums_new) { + result[i].lose_cover_sizek = + (range_sums_new - range_sums) << PSHIFT; + result[i].bad = 1; + } else + result[i].lose_cover_sizek = + (range_sums - range_sums_new) << PSHIFT; + + /* double check it */ + if (!result[i].bad && !result[i].lose_cover_sizek) { + if (nr_range_new != nr_range || + memcmp(range, range_new, sizeof(range))) + result[i].bad = 1; + } + + if (!result[i].bad && (range_sums - range_sums_new < + min_loss_pfn[num_reg])) { + min_loss_pfn[num_reg] = + range_sums - range_sums_new; + } +} + +static void __init mtrr_print_out_one_result(int i) +{ + char gran_factor, chunk_factor, lose_factor; + unsigned long gran_base, chunk_base, lose_base; + + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", + result[i].bad ? "*BAD*" : " ", + gran_base, gran_factor, chunk_base, chunk_factor); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", + result[i].num_reg, result[i].bad ? "-" : "", + lose_base, lose_factor); +} + +static int __init mtrr_search_optimal_index(void) +{ + int i; + int num_reg_good; + int index_good; + + if (nr_mtrr_spare_reg >= num_var_ranges) + nr_mtrr_spare_reg = num_var_ranges - 1; + num_reg_good = -1; + for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { + if (!min_loss_pfn[i]) + num_reg_good = i; + } + + index_good = -1; + if (num_reg_good != -1) { + for (i = 0; i < NUM_RESULT; i++) { + if (!result[i].bad && + result[i].num_reg == num_reg_good && + !result[i].lose_cover_sizek) { + index_good = i; + break; + } + } + } + + return index_good; +} + + +int __init mtrr_cleanup(unsigned address_bits) +{ + unsigned long extra_remove_base, extra_remove_size; + unsigned long base, size, def, dummy; + mtrr_type type; + u64 chunk_size, gran_size; + int index_good; + int i; + + if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) + return 0; + rdmsr(MTRRdefType_MSR, def, dummy); + def &= 0xff; + if (def != MTRR_TYPE_UNCACHABLE) + return 0; + + /* get it and store it aside */ + memset(range_state, 0, sizeof(range_state)); + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + range_state[i].base_pfn = base; + range_state[i].size_pfn = size; + range_state[i].type = type; + } + + /* check if we need handle it and can handle it */ + if (!mtrr_need_cleanup()) + return 0; + + /* print original var MTRRs at first, for debugging: */ + printk(KERN_DEBUG "original variable MTRRs\n"); + print_out_mtrr_range_state(); + + memset(range, 0, sizeof(range)); + extra_remove_size = 0; + extra_remove_base = 1 << (32 - PAGE_SHIFT); + if (mtrr_tom2) + extra_remove_size = + (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; + nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, + extra_remove_size); + /* + * [0, 1M) should always be coverred by var mtrr with WB + * and fixed mtrrs should take effective before var mtrr for it + */ + nr_range = add_range_with_merge(range, nr_range, 0, + (1ULL<<(20 - PAGE_SHIFT)) - 1); + /* sort the ranges */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + + range_sums = sum_ranges(range, nr_range); + printk(KERN_INFO "total RAM coverred: %ldM\n", + range_sums >> (20 - PAGE_SHIFT)); + + if (mtrr_chunk_size && mtrr_gran_size) { + i = 0; + mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, + extra_remove_base, extra_remove_size, i); + + mtrr_print_out_one_result(i); + + if (!result[i].bad) { + set_var_mtrr_all(address_bits); + printk(KERN_DEBUG "New variable MTRRs\n"); + print_out_mtrr_range_state(); + return 1; + } + printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " + "will find optimal one\n"); + } + + i = 0; + memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); + memset(result, 0, sizeof(result)); + for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { + + for (chunk_size = gran_size; chunk_size < (1ULL<<32); + chunk_size <<= 1) { + + if (i >= NUM_RESULT) + continue; + + mtrr_calc_range_state(chunk_size, gran_size, + extra_remove_base, extra_remove_size, i); + if (debug_print) { + mtrr_print_out_one_result(i); + printk(KERN_INFO "\n"); + } + + i++; + } + } + + /* try to find the optimal index */ + index_good = mtrr_search_optimal_index(); + + if (index_good != -1) { + printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); + i = index_good; + mtrr_print_out_one_result(i); + + /* convert ranges to var ranges state */ + chunk_size = result[i].chunk_sizek; + chunk_size <<= 10; + gran_size = result[i].gran_sizek; + gran_size <<= 10; + x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); + set_var_mtrr_all(address_bits); + printk(KERN_DEBUG "New variable MTRRs\n"); + print_out_mtrr_range_state(); + return 1; + } else { + /* print out all */ + for (i = 0; i < NUM_RESULT; i++) + mtrr_print_out_one_result(i); + } + + printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); + printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); + + return 0; +} +#else +int __init mtrr_cleanup(unsigned address_bits) +{ + return 0; +} +#endif + +static int disable_mtrr_trim; + +static int __init disable_mtrr_trim_setup(char *str) +{ + disable_mtrr_trim = 1; + return 0; +} +early_param("disable_mtrr_trim", disable_mtrr_trim_setup); + +/* + * Newer AMD K8s and later CPUs have a special magic MSR way to force WB + * for memory >4GB. Check for that here. + * Note this won't check if the MTRRs < 4GB where the magic bit doesn't + * apply to are wrong, but so far we don't know of any such case in the wild. + */ +#define Tom2Enabled (1U << 21) +#define Tom2ForceMemTypeWB (1U << 22) + +int __init amd_special_default_mtrr(void) +{ + u32 l, h; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return 0; + if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) + return 0; + /* In case some hypervisor doesn't pass SYSCFG through */ + if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) + return 0; + /* + * Memory between 4GB and top of mem is forced WB by this magic bit. + * Reserved before K8RevF, but should be zero there. + */ + if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) == + (Tom2Enabled | Tom2ForceMemTypeWB)) + return 1; + return 0; +} + +static u64 __init real_trim_memory(unsigned long start_pfn, + unsigned long limit_pfn) +{ + u64 trim_start, trim_size; + trim_start = start_pfn; + trim_start <<= PAGE_SHIFT; + trim_size = limit_pfn; + trim_size <<= PAGE_SHIFT; + trim_size -= trim_start; + + return e820_update_range(trim_start, trim_size, E820_RAM, + E820_RESERVED); +} +/** + * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs + * @end_pfn: ending page frame number + * + * Some buggy BIOSes don't setup the MTRRs properly for systems with certain + * memory configurations. This routine checks that the highest MTRR matches + * the end of memory, to make sure the MTRRs having a write back type cover + * all of the memory the kernel is intending to use. If not, it'll trim any + * memory off the end by adjusting end_pfn, removing it from the kernel's + * allocation pools, warning the user with an obnoxious message. + */ +int __init mtrr_trim_uncached_memory(unsigned long end_pfn) +{ + unsigned long i, base, size, highest_pfn = 0, def, dummy; + mtrr_type type; + u64 total_trim_size; + + /* extra one for all 0 */ + int num[MTRR_NUM_TYPES + 1]; + /* + * Make sure we only trim uncachable memory on machines that + * support the Intel MTRR architecture: + */ + if (!is_cpu(INTEL) || disable_mtrr_trim) + return 0; + rdmsr(MTRRdefType_MSR, def, dummy); + def &= 0xff; + if (def != MTRR_TYPE_UNCACHABLE) + return 0; + + /* get it and store it aside */ + memset(range_state, 0, sizeof(range_state)); + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + range_state[i].base_pfn = base; + range_state[i].size_pfn = size; + range_state[i].type = type; + } + + /* Find highest cached pfn */ + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type != MTRR_TYPE_WRBACK) + continue; + base = range_state[i].base_pfn; + size = range_state[i].size_pfn; + if (highest_pfn < base + size) + highest_pfn = base + size; + } + + /* kvm/qemu doesn't have mtrr set right, don't trim them all */ + if (!highest_pfn) { + printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); + return 0; + } + + /* check entries number */ + memset(num, 0, sizeof(num)); + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type >= MTRR_NUM_TYPES) + continue; + size = range_state[i].size_pfn; + if (!size) + type = MTRR_NUM_TYPES; + num[type]++; + } + + /* no entry for WB? */ + if (!num[MTRR_TYPE_WRBACK]) + return 0; + + /* check if we only had WB and UC */ + if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != + num_var_ranges - num[MTRR_NUM_TYPES]) + return 0; + + memset(range, 0, sizeof(range)); + nr_range = 0; + if (mtrr_tom2) { + range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); + range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; + if (highest_pfn < range[nr_range].end + 1) + highest_pfn = range[nr_range].end + 1; + nr_range++; + } + nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); + + total_trim_size = 0; + /* check the head */ + if (range[0].start) + total_trim_size += real_trim_memory(0, range[0].start); + /* check the holes */ + for (i = 0; i < nr_range - 1; i++) { + if (range[i].end + 1 < range[i+1].start) + total_trim_size += real_trim_memory(range[i].end + 1, + range[i+1].start); + } + /* check the top */ + i = nr_range - 1; + if (range[i].end + 1 < end_pfn) + total_trim_size += real_trim_memory(range[i].end + 1, + end_pfn); + + if (total_trim_size) { + printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" + " all of memory, losing %lluMB of RAM.\n", + total_trim_size >> 20); + + if (!changed_by_mtrr_cleanup) + WARN_ON(1); + + printk(KERN_INFO "update e820 for mtrr\n"); + update_e820(); + + return 1; + } + + return 0; +} + diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 311a4734609b..5c2e266f41d7 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -610,1060 +610,7 @@ static struct sysdev_driver mtrr_sysdev_driver = { .resume = mtrr_restore, }; -/* should be related to MTRR_VAR_RANGES nums */ -#define RANGE_NUM 256 - -struct res_range { - unsigned long start; - unsigned long end; -}; - -static int __init -add_range(struct res_range *range, int nr_range, unsigned long start, - unsigned long end) -{ - /* out of slots */ - if (nr_range >= RANGE_NUM) - return nr_range; - - range[nr_range].start = start; - range[nr_range].end = end; - - nr_range++; - - return nr_range; -} - -static int __init -add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, - unsigned long end) -{ - int i; - - /* try to merge it with old one */ - for (i = 0; i < nr_range; i++) { - unsigned long final_start, final_end; - unsigned long common_start, common_end; - - if (!range[i].end) - continue; - - common_start = max(range[i].start, start); - common_end = min(range[i].end, end); - if (common_start > common_end + 1) - continue; - - final_start = min(range[i].start, start); - final_end = max(range[i].end, end); - - range[i].start = final_start; - range[i].end = final_end; - return nr_range; - } - - /* need to add that */ - return add_range(range, nr_range, start, end); -} - -static void __init -subtract_range(struct res_range *range, unsigned long start, unsigned long end) -{ - int i, j; - - for (j = 0; j < RANGE_NUM; j++) { - if (!range[j].end) - continue; - - if (start <= range[j].start && end >= range[j].end) { - range[j].start = 0; - range[j].end = 0; - continue; - } - - if (start <= range[j].start && end < range[j].end && - range[j].start < end + 1) { - range[j].start = end + 1; - continue; - } - - - if (start > range[j].start && end >= range[j].end && - range[j].end > start - 1) { - range[j].end = start - 1; - continue; - } - - if (start > range[j].start && end < range[j].end) { - /* find the new spare */ - for (i = 0; i < RANGE_NUM; i++) { - if (range[i].end == 0) - break; - } - if (i < RANGE_NUM) { - range[i].end = range[j].end; - range[i].start = end + 1; - } else { - printk(KERN_ERR "run of slot in ranges\n"); - } - range[j].end = start - 1; - continue; - } - } -} - -static int __init cmp_range(const void *x1, const void *x2) -{ - const struct res_range *r1 = x1; - const struct res_range *r2 = x2; - long start1, start2; - - start1 = r1->start; - start2 = r2->start; - - return start1 - start2; -} - -struct var_mtrr_range_state { - unsigned long base_pfn; - unsigned long size_pfn; - mtrr_type type; -}; - -static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; -static int __initdata debug_print; - -static int __init -x86_get_mtrr_mem_range(struct res_range *range, int nr_range, - unsigned long extra_remove_base, - unsigned long extra_remove_size) -{ - unsigned long i, base, size; - mtrr_type type; - - for (i = 0; i < num_var_ranges; i++) { - type = range_state[i].type; - if (type != MTRR_TYPE_WRBACK) - continue; - base = range_state[i].base_pfn; - size = range_state[i].size_pfn; - nr_range = add_range_with_merge(range, nr_range, base, - base + size - 1); - } - if (debug_print) { - printk(KERN_DEBUG "After WB checking\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", - range[i].start, range[i].end + 1); - } - - /* take out UC ranges */ - for (i = 0; i < num_var_ranges; i++) { - type = range_state[i].type; - if (type != MTRR_TYPE_UNCACHABLE && - type != MTRR_TYPE_WRPROT) - continue; - size = range_state[i].size_pfn; - if (!size) - continue; - base = range_state[i].base_pfn; - subtract_range(range, base, base + size - 1); - } - if (extra_remove_size) - subtract_range(range, extra_remove_base, - extra_remove_base + extra_remove_size - 1); - - /* get new range num */ - nr_range = 0; - for (i = 0; i < RANGE_NUM; i++) { - if (!range[i].end) - continue; - nr_range++; - } - if (debug_print) { - printk(KERN_DEBUG "After UC checking\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", - range[i].start, range[i].end + 1); - } - - /* sort the ranges */ - sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); - if (debug_print) { - printk(KERN_DEBUG "After sorting\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", - range[i].start, range[i].end + 1); - } - - /* clear those is not used */ - for (i = nr_range; i < RANGE_NUM; i++) - memset(&range[i], 0, sizeof(range[i])); - - return nr_range; -} - -static struct res_range __initdata range[RANGE_NUM]; -static int __initdata nr_range; - -#ifdef CONFIG_MTRR_SANITIZER - -static unsigned long __init sum_ranges(struct res_range *range, int nr_range) -{ - unsigned long sum; - int i; - - sum = 0; - for (i = 0; i < nr_range; i++) - sum += range[i].end + 1 - range[i].start; - - return sum; -} - -static int enable_mtrr_cleanup __initdata = - CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; - -static int __init disable_mtrr_cleanup_setup(char *str) -{ - enable_mtrr_cleanup = 0; - return 0; -} -early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); - -static int __init enable_mtrr_cleanup_setup(char *str) -{ - enable_mtrr_cleanup = 1; - return 0; -} -early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); - -static int __init mtrr_cleanup_debug_setup(char *str) -{ - debug_print = 1; - return 0; -} -early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); - -struct var_mtrr_state { - unsigned long range_startk; - unsigned long range_sizek; - unsigned long chunk_sizek; - unsigned long gran_sizek; - unsigned int reg; -}; - -static void __init -set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, - unsigned char type, unsigned int address_bits) -{ - u32 base_lo, base_hi, mask_lo, mask_hi; - u64 base, mask; - - if (!sizek) { - fill_mtrr_var_range(reg, 0, 0, 0, 0); - return; - } - - mask = (1ULL << address_bits) - 1; - mask &= ~((((u64)sizek) << 10) - 1); - - base = ((u64)basek) << 10; - - base |= type; - mask |= 0x800; - - base_lo = base & ((1ULL<<32) - 1); - base_hi = base >> 32; - - mask_lo = mask & ((1ULL<<32) - 1); - mask_hi = mask >> 32; - - fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); -} - -static void __init -save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, - unsigned char type) -{ - range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); - range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); - range_state[reg].type = type; -} - -static void __init -set_var_mtrr_all(unsigned int address_bits) -{ - unsigned long basek, sizek; - unsigned char type; - unsigned int reg; - - for (reg = 0; reg < num_var_ranges; reg++) { - basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); - sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); - type = range_state[reg].type; - - set_var_mtrr(reg, basek, sizek, type, address_bits); - } -} - -static unsigned long to_size_factor(unsigned long sizek, char *factorp) -{ - char factor; - unsigned long base = sizek; - - if (base & ((1<<10) - 1)) { - /* not MB alignment */ - factor = 'K'; - } else if (base & ((1<<20) - 1)){ - factor = 'M'; - base >>= 10; - } else { - factor = 'G'; - base >>= 20; - } - - *factorp = factor; - - return base; -} - -static unsigned int __init -range_to_mtrr(unsigned int reg, unsigned long range_startk, - unsigned long range_sizek, unsigned char type) -{ - if (!range_sizek || (reg >= num_var_ranges)) - return reg; - - while (range_sizek) { - unsigned long max_align, align; - unsigned long sizek; - - /* Compute the maximum size I can make a range */ - if (range_startk) - max_align = ffs(range_startk) - 1; - else - max_align = 32; - align = fls(range_sizek) - 1; - if (align > max_align) - align = max_align; - - sizek = 1 << align; - if (debug_print) { - char start_factor = 'K', size_factor = 'K'; - unsigned long start_base, size_base; - - start_base = to_size_factor(range_startk, &start_factor), - size_base = to_size_factor(sizek, &size_factor), - - printk(KERN_DEBUG "Setting variable MTRR %d, " - "base: %ld%cB, range: %ld%cB, type %s\n", - reg, start_base, start_factor, - size_base, size_factor, - (type == MTRR_TYPE_UNCACHABLE)?"UC": - ((type == MTRR_TYPE_WRBACK)?"WB":"Other") - ); - } - save_var_mtrr(reg++, range_startk, sizek, type); - range_startk += sizek; - range_sizek -= sizek; - if (reg >= num_var_ranges) - break; - } - return reg; -} - -static unsigned __init -range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, - unsigned long sizek) -{ - unsigned long hole_basek, hole_sizek; - unsigned long second_basek, second_sizek; - unsigned long range0_basek, range0_sizek; - unsigned long range_basek, range_sizek; - unsigned long chunk_sizek; - unsigned long gran_sizek; - - hole_basek = 0; - hole_sizek = 0; - second_basek = 0; - second_sizek = 0; - chunk_sizek = state->chunk_sizek; - gran_sizek = state->gran_sizek; - - /* align with gran size, prevent small block used up MTRRs */ - range_basek = ALIGN(state->range_startk, gran_sizek); - if ((range_basek > basek) && basek) - return second_sizek; - state->range_sizek -= (range_basek - state->range_startk); - range_sizek = ALIGN(state->range_sizek, gran_sizek); - - while (range_sizek > state->range_sizek) { - range_sizek -= gran_sizek; - if (!range_sizek) - return 0; - } - state->range_sizek = range_sizek; - - /* try to append some small hole */ - range0_basek = state->range_startk; - range0_sizek = ALIGN(state->range_sizek, chunk_sizek); - - /* no increase */ - if (range0_sizek == state->range_sizek) { - if (debug_print) - printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", - range0_basek<<10, - (range0_basek + state->range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range0_basek, - state->range_sizek, MTRR_TYPE_WRBACK); - return 0; - } - - /* only cut back, when it is not the last */ - if (sizek) { - while (range0_basek + range0_sizek > (basek + sizek)) { - if (range0_sizek >= chunk_sizek) - range0_sizek -= chunk_sizek; - else - range0_sizek = 0; - - if (!range0_sizek) - break; - } - } - -second_try: - range_basek = range0_basek + range0_sizek; - - /* one hole in the middle */ - if (range_basek > basek && range_basek <= (basek + sizek)) - second_sizek = range_basek - basek; - - if (range0_sizek > state->range_sizek) { - - /* one hole in middle or at end */ - hole_sizek = range0_sizek - state->range_sizek - second_sizek; - - /* hole size should be less than half of range0 size */ - if (hole_sizek >= (range0_sizek >> 1) && - range0_sizek >= chunk_sizek) { - range0_sizek -= chunk_sizek; - second_sizek = 0; - hole_sizek = 0; - - goto second_try; - } - } - - if (range0_sizek) { - if (debug_print) - printk(KERN_DEBUG "range0: %016lx - %016lx\n", - range0_basek<<10, - (range0_basek + range0_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range0_basek, - range0_sizek, MTRR_TYPE_WRBACK); - } - - if (range0_sizek < state->range_sizek) { - /* need to handle left over */ - range_sizek = state->range_sizek - range0_sizek; - - if (debug_print) - printk(KERN_DEBUG "range: %016lx - %016lx\n", - range_basek<<10, - (range_basek + range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range_basek, - range_sizek, MTRR_TYPE_WRBACK); - } - - if (hole_sizek) { - hole_basek = range_basek - hole_sizek - second_sizek; - if (debug_print) - printk(KERN_DEBUG "hole: %016lx - %016lx\n", - hole_basek<<10, - (hole_basek + hole_sizek)<<10); - state->reg = range_to_mtrr(state->reg, hole_basek, - hole_sizek, MTRR_TYPE_UNCACHABLE); - } - - return second_sizek; -} - -static void __init -set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, - unsigned long size_pfn) -{ - unsigned long basek, sizek; - unsigned long second_sizek = 0; - - if (state->reg >= num_var_ranges) - return; - - basek = base_pfn << (PAGE_SHIFT - 10); - sizek = size_pfn << (PAGE_SHIFT - 10); - - /* See if I can merge with the last range */ - if ((basek <= 1024) || - (state->range_startk + state->range_sizek == basek)) { - unsigned long endk = basek + sizek; - state->range_sizek = endk - state->range_startk; - return; - } - /* Write the range mtrrs */ - if (state->range_sizek != 0) - second_sizek = range_to_mtrr_with_hole(state, basek, sizek); - - /* Allocate an msr */ - state->range_startk = basek + second_sizek; - state->range_sizek = sizek - second_sizek; -} - -/* mininum size of mtrr block that can take hole */ -static u64 mtrr_chunk_size __initdata = (256ULL<<20); - -static int __init parse_mtrr_chunk_size_opt(char *p) -{ - if (!p) - return -EINVAL; - mtrr_chunk_size = memparse(p, &p); - return 0; -} -early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); - -/* granity of mtrr of block */ -static u64 mtrr_gran_size __initdata; - -static int __init parse_mtrr_gran_size_opt(char *p) -{ - if (!p) - return -EINVAL; - mtrr_gran_size = memparse(p, &p); - return 0; -} -early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); - -static int nr_mtrr_spare_reg __initdata = - CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; - -static int __init parse_mtrr_spare_reg(char *arg) -{ - if (arg) - nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); - return 0; -} - -early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); - -static int __init -x86_setup_var_mtrrs(struct res_range *range, int nr_range, - u64 chunk_size, u64 gran_size) -{ - struct var_mtrr_state var_state; - int i; - int num_reg; - - var_state.range_startk = 0; - var_state.range_sizek = 0; - var_state.reg = 0; - var_state.chunk_sizek = chunk_size >> 10; - var_state.gran_sizek = gran_size >> 10; - - memset(range_state, 0, sizeof(range_state)); - - /* Write the range etc */ - for (i = 0; i < nr_range; i++) - set_var_mtrr_range(&var_state, range[i].start, - range[i].end - range[i].start + 1); - - /* Write the last range */ - if (var_state.range_sizek != 0) - range_to_mtrr_with_hole(&var_state, 0, 0); - - num_reg = var_state.reg; - /* Clear out the extra MTRR's */ - while (var_state.reg < num_var_ranges) { - save_var_mtrr(var_state.reg, 0, 0, 0); - var_state.reg++; - } - - return num_reg; -} - -struct mtrr_cleanup_result { - unsigned long gran_sizek; - unsigned long chunk_sizek; - unsigned long lose_cover_sizek; - unsigned int num_reg; - int bad; -}; - -/* - * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G - * chunk size: gran_size, ..., 2G - * so we need (1+16)*8 - */ -#define NUM_RESULT 136 -#define PSHIFT (PAGE_SHIFT - 10) - -static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; -static unsigned long __initdata min_loss_pfn[RANGE_NUM]; - -static void __init print_out_mtrr_range_state(void) -{ - int i; - char start_factor = 'K', size_factor = 'K'; - unsigned long start_base, size_base; - mtrr_type type; - - for (i = 0; i < num_var_ranges; i++) { - - size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); - if (!size_base) - continue; - - size_base = to_size_factor(size_base, &size_factor), - start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); - start_base = to_size_factor(start_base, &start_factor), - type = range_state[i].type; - - printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", - i, start_base, start_factor, - size_base, size_factor, - (type == MTRR_TYPE_UNCACHABLE) ? "UC" : - ((type == MTRR_TYPE_WRPROT) ? "WP" : - ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) - ); - } -} - -static int __init mtrr_need_cleanup(void) -{ - int i; - mtrr_type type; - unsigned long size; - /* extra one for all 0 */ - int num[MTRR_NUM_TYPES + 1]; - - /* check entries number */ - memset(num, 0, sizeof(num)); - for (i = 0; i < num_var_ranges; i++) { - type = range_state[i].type; - size = range_state[i].size_pfn; - if (type >= MTRR_NUM_TYPES) - continue; - if (!size) - type = MTRR_NUM_TYPES; - if (type == MTRR_TYPE_WRPROT) - type = MTRR_TYPE_UNCACHABLE; - num[type]++; - } - - /* check if we got UC entries */ - if (!num[MTRR_TYPE_UNCACHABLE]) - return 0; - - /* check if we only had WB and UC */ - if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != - num_var_ranges - num[MTRR_NUM_TYPES]) - return 0; - - return 1; -} - -static unsigned long __initdata range_sums; -static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, - unsigned long extra_remove_base, - unsigned long extra_remove_size, - int i) -{ - int num_reg; - static struct res_range range_new[RANGE_NUM]; - static int nr_range_new; - unsigned long range_sums_new; - - /* convert ranges to var ranges state */ - num_reg = x86_setup_var_mtrrs(range, nr_range, - chunk_size, gran_size); - - /* we got new setting in range_state, check it */ - memset(range_new, 0, sizeof(range_new)); - nr_range_new = x86_get_mtrr_mem_range(range_new, 0, - extra_remove_base, extra_remove_size); - range_sums_new = sum_ranges(range_new, nr_range_new); - - result[i].chunk_sizek = chunk_size >> 10; - result[i].gran_sizek = gran_size >> 10; - result[i].num_reg = num_reg; - if (range_sums < range_sums_new) { - result[i].lose_cover_sizek = - (range_sums_new - range_sums) << PSHIFT; - result[i].bad = 1; - } else - result[i].lose_cover_sizek = - (range_sums - range_sums_new) << PSHIFT; - - /* double check it */ - if (!result[i].bad && !result[i].lose_cover_sizek) { - if (nr_range_new != nr_range || - memcmp(range, range_new, sizeof(range))) - result[i].bad = 1; - } - - if (!result[i].bad && (range_sums - range_sums_new < - min_loss_pfn[num_reg])) { - min_loss_pfn[num_reg] = - range_sums - range_sums_new; - } -} - -static void __init mtrr_print_out_one_result(int i) -{ - char gran_factor, chunk_factor, lose_factor; - unsigned long gran_base, chunk_base, lose_base; - - gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), - chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), - lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), - printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", - result[i].bad ? "*BAD*" : " ", - gran_base, gran_factor, chunk_base, chunk_factor); - printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", - result[i].num_reg, result[i].bad ? "-" : "", - lose_base, lose_factor); -} - -static int __init mtrr_search_optimal_index(void) -{ - int i; - int num_reg_good; - int index_good; - - if (nr_mtrr_spare_reg >= num_var_ranges) - nr_mtrr_spare_reg = num_var_ranges - 1; - num_reg_good = -1; - for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { - if (!min_loss_pfn[i]) - num_reg_good = i; - } - - index_good = -1; - if (num_reg_good != -1) { - for (i = 0; i < NUM_RESULT; i++) { - if (!result[i].bad && - result[i].num_reg == num_reg_good && - !result[i].lose_cover_sizek) { - index_good = i; - break; - } - } - } - - return index_good; -} - - -static int __init mtrr_cleanup(unsigned address_bits) -{ - unsigned long extra_remove_base, extra_remove_size; - unsigned long base, size, def, dummy; - mtrr_type type; - u64 chunk_size, gran_size; - int index_good; - int i; - - if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) - return 0; - rdmsr(MTRRdefType_MSR, def, dummy); - def &= 0xff; - if (def != MTRR_TYPE_UNCACHABLE) - return 0; - - /* get it and store it aside */ - memset(range_state, 0, sizeof(range_state)); - for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); - range_state[i].base_pfn = base; - range_state[i].size_pfn = size; - range_state[i].type = type; - } - - /* check if we need handle it and can handle it */ - if (!mtrr_need_cleanup()) - return 0; - - /* print original var MTRRs at first, for debugging: */ - printk(KERN_DEBUG "original variable MTRRs\n"); - print_out_mtrr_range_state(); - - memset(range, 0, sizeof(range)); - extra_remove_size = 0; - extra_remove_base = 1 << (32 - PAGE_SHIFT); - if (mtrr_tom2) - extra_remove_size = - (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; - nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, - extra_remove_size); - /* - * [0, 1M) should always be coverred by var mtrr with WB - * and fixed mtrrs should take effective before var mtrr for it - */ - nr_range = add_range_with_merge(range, nr_range, 0, - (1ULL<<(20 - PAGE_SHIFT)) - 1); - /* sort the ranges */ - sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); - - range_sums = sum_ranges(range, nr_range); - printk(KERN_INFO "total RAM coverred: %ldM\n", - range_sums >> (20 - PAGE_SHIFT)); - - if (mtrr_chunk_size && mtrr_gran_size) { - i = 0; - mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, - extra_remove_base, extra_remove_size, i); - - mtrr_print_out_one_result(i); - - if (!result[i].bad) { - set_var_mtrr_all(address_bits); - printk(KERN_DEBUG "New variable MTRRs\n"); - print_out_mtrr_range_state(); - return 1; - } - printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " - "will find optimal one\n"); - } - - i = 0; - memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); - memset(result, 0, sizeof(result)); - for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { - - for (chunk_size = gran_size; chunk_size < (1ULL<<32); - chunk_size <<= 1) { - - if (i >= NUM_RESULT) - continue; - - mtrr_calc_range_state(chunk_size, gran_size, - extra_remove_base, extra_remove_size, i); - if (debug_print) { - mtrr_print_out_one_result(i); - printk(KERN_INFO "\n"); - } - - i++; - } - } - - /* try to find the optimal index */ - index_good = mtrr_search_optimal_index(); - - if (index_good != -1) { - printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); - i = index_good; - mtrr_print_out_one_result(i); - - /* convert ranges to var ranges state */ - chunk_size = result[i].chunk_sizek; - chunk_size <<= 10; - gran_size = result[i].gran_sizek; - gran_size <<= 10; - x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); - set_var_mtrr_all(address_bits); - printk(KERN_DEBUG "New variable MTRRs\n"); - print_out_mtrr_range_state(); - return 1; - } else { - /* print out all */ - for (i = 0; i < NUM_RESULT; i++) - mtrr_print_out_one_result(i); - } - - printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); - printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); - - return 0; -} -#else -static int __init mtrr_cleanup(unsigned address_bits) -{ - return 0; -} -#endif - -static int __initdata changed_by_mtrr_cleanup; - -static int disable_mtrr_trim; - -static int __init disable_mtrr_trim_setup(char *str) -{ - disable_mtrr_trim = 1; - return 0; -} -early_param("disable_mtrr_trim", disable_mtrr_trim_setup); - -/* - * Newer AMD K8s and later CPUs have a special magic MSR way to force WB - * for memory >4GB. Check for that here. - * Note this won't check if the MTRRs < 4GB where the magic bit doesn't - * apply to are wrong, but so far we don't know of any such case in the wild. - */ -#define Tom2Enabled (1U << 21) -#define Tom2ForceMemTypeWB (1U << 22) - -int __init amd_special_default_mtrr(void) -{ - u32 l, h; - - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) - return 0; - if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) - return 0; - /* In case some hypervisor doesn't pass SYSCFG through */ - if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) - return 0; - /* - * Memory between 4GB and top of mem is forced WB by this magic bit. - * Reserved before K8RevF, but should be zero there. - */ - if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) == - (Tom2Enabled | Tom2ForceMemTypeWB)) - return 1; - return 0; -} - -static u64 __init real_trim_memory(unsigned long start_pfn, - unsigned long limit_pfn) -{ - u64 trim_start, trim_size; - trim_start = start_pfn; - trim_start <<= PAGE_SHIFT; - trim_size = limit_pfn; - trim_size <<= PAGE_SHIFT; - trim_size -= trim_start; - - return e820_update_range(trim_start, trim_size, E820_RAM, - E820_RESERVED); -} -/** - * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs - * @end_pfn: ending page frame number - * - * Some buggy BIOSes don't setup the MTRRs properly for systems with certain - * memory configurations. This routine checks that the highest MTRR matches - * the end of memory, to make sure the MTRRs having a write back type cover - * all of the memory the kernel is intending to use. If not, it'll trim any - * memory off the end by adjusting end_pfn, removing it from the kernel's - * allocation pools, warning the user with an obnoxious message. - */ -int __init mtrr_trim_uncached_memory(unsigned long end_pfn) -{ - unsigned long i, base, size, highest_pfn = 0, def, dummy; - mtrr_type type; - u64 total_trim_size; - - /* extra one for all 0 */ - int num[MTRR_NUM_TYPES + 1]; - /* - * Make sure we only trim uncachable memory on machines that - * support the Intel MTRR architecture: - */ - if (!is_cpu(INTEL) || disable_mtrr_trim) - return 0; - rdmsr(MTRRdefType_MSR, def, dummy); - def &= 0xff; - if (def != MTRR_TYPE_UNCACHABLE) - return 0; - - /* get it and store it aside */ - memset(range_state, 0, sizeof(range_state)); - for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); - range_state[i].base_pfn = base; - range_state[i].size_pfn = size; - range_state[i].type = type; - } - - /* Find highest cached pfn */ - for (i = 0; i < num_var_ranges; i++) { - type = range_state[i].type; - if (type != MTRR_TYPE_WRBACK) - continue; - base = range_state[i].base_pfn; - size = range_state[i].size_pfn; - if (highest_pfn < base + size) - highest_pfn = base + size; - } - - /* kvm/qemu doesn't have mtrr set right, don't trim them all */ - if (!highest_pfn) { - printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); - return 0; - } - - /* check entries number */ - memset(num, 0, sizeof(num)); - for (i = 0; i < num_var_ranges; i++) { - type = range_state[i].type; - if (type >= MTRR_NUM_TYPES) - continue; - size = range_state[i].size_pfn; - if (!size) - type = MTRR_NUM_TYPES; - num[type]++; - } - - /* no entry for WB? */ - if (!num[MTRR_TYPE_WRBACK]) - return 0; - - /* check if we only had WB and UC */ - if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != - num_var_ranges - num[MTRR_NUM_TYPES]) - return 0; - - memset(range, 0, sizeof(range)); - nr_range = 0; - if (mtrr_tom2) { - range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); - range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; - if (highest_pfn < range[nr_range].end + 1) - highest_pfn = range[nr_range].end + 1; - nr_range++; - } - nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); - - total_trim_size = 0; - /* check the head */ - if (range[0].start) - total_trim_size += real_trim_memory(0, range[0].start); - /* check the holes */ - for (i = 0; i < nr_range - 1; i++) { - if (range[i].end + 1 < range[i+1].start) - total_trim_size += real_trim_memory(range[i].end + 1, - range[i+1].start); - } - /* check the top */ - i = nr_range - 1; - if (range[i].end + 1 < end_pfn) - total_trim_size += real_trim_memory(range[i].end + 1, - end_pfn); - - if (total_trim_size) { - printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" - " all of memory, losing %lluMB of RAM.\n", - total_trim_size >> 20); - - if (!changed_by_mtrr_cleanup) - WARN_ON(1); - - printk(KERN_INFO "update e820 for mtrr\n"); - update_e820(); - - return 1; - } - - return 0; -} +int __initdata changed_by_mtrr_cleanup; /** * mtrr_bp_init - initialize mtrrs on the boot CPU diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index ffd60409cc6d..6710e93021a7 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -88,3 +88,6 @@ void mtrr_wrmsr(unsigned, unsigned, unsigned); int amd_init_mtrr(void); int cyrix_init_mtrr(void); int centaur_init_mtrr(void); + +extern int changed_by_mtrr_cleanup; +extern int mtrr_cleanup(unsigned address_bits); -- cgit v1.2.3 From 91219bcbdcccc1686b0ecce09e28825c93619c07 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 12 Mar 2009 02:37:00 +0530 Subject: x86: cpu_debug add write support for MSRs Supported write flag for registers. currently write is enabled only for PMC MSR. [root@ht]# cat /sys/kernel/debug/x86/cpu/cpu1/pmc/0x300/value 0x0 [root@ht]# echo 1234 > /sys/kernel/debug/x86/cpu/cpu1/pmc/0x300/value [root@ht]# cat /sys/kernel/debug/x86/cpu/cpu1/pmc/0x300/value 0x4d2 [root@ht]# echo 0x1234 > /sys/kernel/debug/x86/cpu/cpu1/pmc/0x300/value [root@ht]# cat /sys/kernel/debug/x86/cpu/cpu1/pmc/0x300/value 0x1234 Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu_debug.h | 16 ++++-- arch/x86/kernel/cpu/cpu_debug.c | 118 ++++++++++++++++++++++++++++----------- 2 files changed, 97 insertions(+), 37 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h index d24d64fcee04..56f1635e4617 100755 --- a/arch/x86/include/asm/cpu_debug.h +++ b/arch/x86/include/asm/cpu_debug.h @@ -171,16 +171,22 @@ struct cpu_private { struct cpu_debug_base { char *name; /* Register name */ unsigned flag; /* Register flag */ + unsigned write; /* Register write flag */ }; -struct cpu_cpuX_base { - struct dentry *dentry; /* Register dentry */ - int init; /* Register index file */ -}; - +/* + * Currently it looks similar to cpu_debug_base but once we add more files + * cpu_file_base will go in different direction + */ struct cpu_file_base { char *name; /* Register file name */ unsigned flag; /* Register file flag */ + unsigned write; /* Register write flag */ +}; + +struct cpu_cpuX_base { + struct dentry *dentry; /* Register dentry */ + int init; /* Register index file */ }; struct cpu_debug_range { diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index 9abbcbd933cc..21c0cf8ced18 100755 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -40,41 +41,41 @@ static DEFINE_MUTEX(cpu_debug_lock); static struct dentry *cpu_debugfs_dir; static struct cpu_debug_base cpu_base[] = { - { "mc", CPU_MC }, /* Machine Check */ - { "monitor", CPU_MONITOR }, /* Monitor */ - { "time", CPU_TIME }, /* Time */ - { "pmc", CPU_PMC }, /* Performance Monitor */ - { "platform", CPU_PLATFORM }, /* Platform */ - { "apic", CPU_APIC }, /* APIC */ - { "poweron", CPU_POWERON }, /* Power-on */ - { "control", CPU_CONTROL }, /* Control */ - { "features", CPU_FEATURES }, /* Features control */ - { "lastbranch", CPU_LBRANCH }, /* Last Branch */ - { "bios", CPU_BIOS }, /* BIOS */ - { "freq", CPU_FREQ }, /* Frequency */ - { "mtrr", CPU_MTRR }, /* MTRR */ - { "perf", CPU_PERF }, /* Performance */ - { "cache", CPU_CACHE }, /* Cache */ - { "sysenter", CPU_SYSENTER }, /* Sysenter */ - { "therm", CPU_THERM }, /* Thermal */ - { "misc", CPU_MISC }, /* Miscellaneous */ - { "debug", CPU_DEBUG }, /* Debug */ - { "pat", CPU_PAT }, /* PAT */ - { "vmx", CPU_VMX }, /* VMX */ - { "call", CPU_CALL }, /* System Call */ - { "base", CPU_BASE }, /* BASE Address */ - { "smm", CPU_SMM }, /* System mgmt mode */ - { "svm", CPU_SVM }, /*Secure Virtial Machine*/ - { "osvm", CPU_OSVM }, /* OS-Visible Workaround*/ - { "tss", CPU_TSS }, /* Task Stack Segment */ - { "cr", CPU_CR }, /* Control Registers */ - { "dt", CPU_DT }, /* Descriptor Table */ - { "registers", CPU_REG_ALL }, /* Select all Registers */ + { "mc", CPU_MC, 0 }, + { "monitor", CPU_MONITOR, 0 }, + { "time", CPU_TIME, 0 }, + { "pmc", CPU_PMC, 1 }, + { "platform", CPU_PLATFORM, 0 }, + { "apic", CPU_APIC, 0 }, + { "poweron", CPU_POWERON, 0 }, + { "control", CPU_CONTROL, 0 }, + { "features", CPU_FEATURES, 0 }, + { "lastbranch", CPU_LBRANCH, 0 }, + { "bios", CPU_BIOS, 0 }, + { "freq", CPU_FREQ, 0 }, + { "mtrr", CPU_MTRR, 0 }, + { "perf", CPU_PERF, 0 }, + { "cache", CPU_CACHE, 0 }, + { "sysenter", CPU_SYSENTER, 0 }, + { "therm", CPU_THERM, 0 }, + { "misc", CPU_MISC, 0 }, + { "debug", CPU_DEBUG, 0 }, + { "pat", CPU_PAT, 0 }, + { "vmx", CPU_VMX, 0 }, + { "call", CPU_CALL, 0 }, + { "base", CPU_BASE, 0 }, + { "smm", CPU_SMM, 0 }, + { "svm", CPU_SVM, 0 }, + { "osvm", CPU_OSVM, 0 }, + { "tss", CPU_TSS, 0 }, + { "cr", CPU_CR, 0 }, + { "dt", CPU_DT, 0 }, + { "registers", CPU_REG_ALL, 0 }, }; static struct cpu_file_base cpu_file[] = { - { "index", CPU_REG_ALL }, /* index */ - { "value", CPU_REG_ALL }, /* value */ + { "index", CPU_REG_ALL, 0 }, + { "value", CPU_REG_ALL, 1 }, }; /* Intel Registers Range */ @@ -608,9 +609,62 @@ static int cpu_seq_open(struct inode *inode, struct file *file) return err; } +static int write_msr(struct cpu_private *priv, u64 val) +{ + u32 low, high; + + high = (val >> 32) & 0xffffffff; + low = val & 0xffffffff; + + if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high)) + return 0; + + return -EPERM; +} + +static int write_cpu_register(struct cpu_private *priv, const char *buf) +{ + int ret = -EPERM; + u64 val; + + ret = strict_strtoull(buf, 0, &val); + if (ret < 0) + return ret; + + /* Supporting only MSRs */ + if (priv->type < CPU_TSS_BIT) + return write_msr(priv, val); + + return ret; +} + +static ssize_t cpu_write(struct file *file, const char __user *ubuf, + size_t count, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct cpu_private *priv = seq->private; + char buf[19]; + + if ((priv == NULL) || (count >= sizeof(buf))) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, count)) + return -EFAULT; + + buf[count] = 0; + + if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write)) + if (!write_cpu_register(priv, buf)) + return count; + + return -EACCES; +} + static const struct file_operations cpu_fops = { + .owner = THIS_MODULE, .open = cpu_seq_open, .read = seq_read, + .write = cpu_write, .llseek = seq_lseek, .release = seq_release, }; -- cgit v1.2.3 From 773e673de27297d07d852e7e9bfd1a695cae1da2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 12 Mar 2009 21:35:18 -0700 Subject: x86: fix e820_update_range() Impact: fix left range size on head | commit 5c0e6f035df983210e4d22213aed624ced502d3d | x86: fix code paths used by update_mptable | Impact: fix crashes under Xen due to unrobust e820 code fixes one e820 bug, but introduces another bug. Need to update size for left range at first in case it is header. also add __e820_add_region take more parameter. Signed-off-by: Yinghai Lu Cc: jbeulich@novell.com LKML-Reference: <49B9E286.502@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 3cf6681ac80d..95b81c18b6bc 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -110,19 +110,25 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type) /* * Add a memory region to the kernel e820 map. */ -void __init e820_add_region(u64 start, u64 size, int type) +static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, + int type) { - int x = e820.nr_map; + int x = e820x->nr_map; - if (x == ARRAY_SIZE(e820.map)) { + if (x == ARRAY_SIZE(e820x->map)) { printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); return; } - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; + e820x->map[x].addr = start; + e820x->map[x].size = size; + e820x->map[x].type = type; + e820x->nr_map++; +} + +void __init e820_add_region(u64 start, u64 size, int type) +{ + __e820_add_region(&e820, start, size, type); } void __init e820_print_map(char *who) @@ -417,11 +423,11 @@ static int __init append_e820_map(struct e820entry *biosmap, int nr_map) return __append_e820_map(biosmap, nr_map); } -static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, +static u64 __init __e820_update_range(struct e820map *e820x, u64 start, u64 size, unsigned old_type, unsigned new_type) { - unsigned int i, x; + unsigned int i; u64 real_updated_size = 0; BUG_ON(old_type == new_type); @@ -447,22 +453,19 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, if (final_start >= final_end) continue; - x = e820x->nr_map; - if (x == ARRAY_SIZE(e820x->map)) { - printk(KERN_ERR "Too many memory map entries!\n"); - break; - } - e820x->map[x].addr = final_start; - e820x->map[x].size = final_end - final_start; - e820x->map[x].type = new_type; - e820x->nr_map++; + __e820_add_region(e820x, final_start, final_end - final_start, + new_type); real_updated_size += final_end - final_start; + /* + * left range could be head or tail, so need to update + * size at first. + */ + ei->size -= final_end - final_start; if (ei->addr < final_start) continue; ei->addr = final_end; - ei->size -= final_end - final_start; } return real_updated_size; } @@ -470,13 +473,13 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, unsigned new_type) { - return e820_update_range_map(&e820, start, size, old_type, new_type); + return __e820_update_range(&e820, start, size, old_type, new_type); } static u64 __init e820_update_range_saved(u64 start, u64 size, unsigned old_type, unsigned new_type) { - return e820_update_range_map(&e820_saved, start, size, old_type, + return __e820_update_range(&e820_saved, start, size, old_type, new_type); } -- cgit v1.2.3 From 3ff42da5048649503e343a32be37b14a6a4e8aaf Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 12 Mar 2009 17:39:37 +0100 Subject: x86: mtrr: don't modify RdDram/WrDram bits of fixed MTRRs Impact: bug fix + BIOS workaround BIOS is expected to clear the SYSCFG[MtrrFixDramModEn] on AMD CPUs after fixed MTRRs are configured. Some BIOSes do not clear SYSCFG[MtrrFixDramModEn] on BP (and on APs). This can lead to obfuscation in Linux when this bit is not cleared on BP but cleared on APs. A consequence of this is that the saved fixed-MTRR state (from BP) differs from the fixed-MTRRs of APs -- because RdDram/WrDram bits are read as zero when SYSCFG[MtrrFixDramModEn] is cleared -- and Linux tries to sync fixed-MTRR state from BP to AP. This implies that Linux sets SYSCFG[MtrrFixDramEn] and activates those bits. More important is that (some) systems change these bits in SMM when ACPI is enabled. Hence it is racy if Linux modifies RdMem/WrMem bits, too. (1) The patch modifies an old fix from Bernhard Kaindl to get suspend/resume working on some Acer Laptops. Bernhard's patch tried to sync RdMem/WrMem bits of fixed MTRR registers and that helped on those old Laptops. (Don't ask me why -- can't test it myself). But this old problem was not the motivation for the patch. (See http://lkml.org/lkml/2007/4/3/110) (2) The more important effect is to fix issues on some more current systems. On those systems Linux panics or just freezes, see http://bugzilla.kernel.org/show_bug.cgi?id=11541 (and also duplicates of this bug: http://bugzilla.kernel.org/show_bug.cgi?id=11737 http://bugzilla.kernel.org/show_bug.cgi?id=11714) The affected systems boot only using acpi=ht, acpi=off or when the kernel is built with CONFIG_MTRR=n. The acpi options prevent full enablement of ACPI. Obviously when ACPI is enabled the BIOS/SMM modfies RdMem/WrMem bits. When CONFIG_MTRR=y Linux also accesses and modifies those bits when it needs to sync fixed-MTRRs across cores (Bernhard's fix, see (1)). How do you synchronize that? You can't. As a consequence Linux shouldn't touch those bits at all (Rationale are AMD's BKDGs which recommend to clear the bit that makes RdMem/WrMem accessible). This is the purpose of this patch. And (so far) this suffices to fix (1) and (2). I suggest not to touch RdDram/WrDram bits of fixed-MTRRs and SYSCFG[MtrrFixDramEn] and to clear SYSCFG[MtrrFixDramModEn] as suggested by AMD K8, and AMD family 10h/11h BKDGs. BIOS is expected to do this anyway. This should avoid that Linux and SMM tread on each other's toes ... Signed-off-by: Andreas Herrmann Cc: trenn@suse.de Cc: Yinghai Lu LKML-Reference: <20090312163937.GH20716@alberich.amd.com> Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 51 ++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 964403520100..950c434f793d 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -33,6 +33,32 @@ u64 mtrr_tom2; struct mtrr_state_type mtrr_state = {}; EXPORT_SYMBOL_GPL(mtrr_state); +/** + * BIOS is expected to clear MtrrFixDramModEn bit, see for example + * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD + * Opteron Processors" (26094 Rev. 3.30 February 2006), section + * "13.2.1.2 SYSCFG Register": "The MtrrFixDramModEn bit should be set + * to 1 during BIOS initalization of the fixed MTRRs, then cleared to + * 0 for operation." + */ +static inline void k8_check_syscfg_dram_mod_en(void) +{ + u32 lo, hi; + + if (!((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && + (boot_cpu_data.x86 >= 0x0f))) + return; + + rdmsr(MSR_K8_SYSCFG, lo, hi); + if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) { + printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" + " not cleared by BIOS, clearing this bit\n", + smp_processor_id()); + lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY; + mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi); + } +} + /* * Returns the effective MTRR type for the region * Error returns: @@ -166,6 +192,8 @@ get_fixed_ranges(mtrr_type * frs) unsigned int *p = (unsigned int *) frs; int i; + k8_check_syscfg_dram_mod_en(); + rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); for (i = 0; i < 2; i++) @@ -305,28 +333,11 @@ void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) smp_processor_id(), msr, a, b); } -/** - * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs - * see AMD publication no. 24593, chapter 3.2.1 for more information - */ -static inline void k8_enable_fixed_iorrs(void) -{ - unsigned lo, hi; - - rdmsr(MSR_K8_SYSCFG, lo, hi); - mtrr_wrmsr(MSR_K8_SYSCFG, lo - | K8_MTRRFIXRANGE_DRAM_ENABLE - | K8_MTRRFIXRANGE_DRAM_MODIFY, hi); -} - /** * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have * @msr: MSR address of the MTTR which should be checked and updated * @changed: pointer which indicates whether the MTRR needed to be changed * @msrwords: pointer to the MSR values which the MSR should have - * - * If K8 extentions are wanted, update the K8 SYSCFG MSR also. - * See AMD publication no. 24593, chapter 7.8.1, page 233 for more information. */ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) { @@ -335,10 +346,6 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) rdmsr(msr, lo, hi); if (lo != msrwords[0] || hi != msrwords[1]) { - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) && - ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) - k8_enable_fixed_iorrs(); mtrr_wrmsr(msr, msrwords[0], msrwords[1]); *changed = true; } @@ -426,6 +433,8 @@ static int set_fixed_ranges(mtrr_type * frs) bool changed = false; int block=-1, range; + k8_check_syscfg_dram_mod_en(); + while (fixed_range_blocks[++block].ranges) for (range=0; range < fixed_range_blocks[block].ranges; range++) set_fixed_range(fixed_range_blocks[block].base_msr + range, -- cgit v1.2.3 From 5a8ac9d28dae5330c70562c7d7785f5104059c17 Mon Sep 17 00:00:00 2001 From: Américo Wang Date: Fri, 13 Mar 2009 15:56:58 +0800 Subject: x86: ptrace, bts: fix an unreachable statement Commit c2724775ce57c98b8af9694857b941dc61056516 put a statement after return, which makes that statement unreachable. Move that statement before return. Signed-off-by: WANG Cong Cc: Roland McGrath Cc: Markus Metzger LKML-Reference: <20090313075622.GB8933@hack> Cc: # .29 only Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 3d9672e59c16..19378715f415 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -685,9 +685,8 @@ static int ptrace_bts_config(struct task_struct *child, if (!cfg.signal) return -EINVAL; - return -EOPNOTSUPP; - child->thread.bts_ovfl_signal = cfg.signal; + return -EOPNOTSUPP; } if ((cfg.flags & PTRACE_BTS_O_ALLOC) && -- cgit v1.2.3 From 9766cdbcb260389669e9679b2aa87c11832f479f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 14 Mar 2009 11:19:49 +0530 Subject: x86: cpu/common.c cleanups - fix various style problems - declare varibles before they get used - introduced clear_all_debug_regs - fix header files issues LKML-Reference: <1237009789.4387.2.camel@localhost.localdomain> Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 3 + arch/x86/kernel/cpu/common.c | 129 ++++++++++++++++++++------------------- 2 files changed, 68 insertions(+), 64 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 76139506c3e4..dccef5be0fc1 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -391,6 +391,9 @@ DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); +DECLARE_PER_CPU(unsigned int, irq_count); +extern unsigned long kernel_eflags; +extern asmlinkage void ignore_sysret(void); #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR DECLARE_PER_CPU(unsigned long, stack_canary); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 826d5c876278..cad6878c88db 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,52 +1,52 @@ -#include -#include -#include -#include +#include #include +#include #include +#include #include -#include -#include +#include +#include #include +#include +#include +#include #include -#include -#include -#include -#include -#include +#include + +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include #include +#include #include -#include -#include #include -#include -#include -#include #ifdef CONFIG_X86_LOCAL_APIC #include #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include - #include "cpu.h" #ifdef CONFIG_X86_64 /* all of these masks are initialized in setup_cpu_local_masks() */ -cpumask_var_t cpu_callin_mask; -cpumask_var_t cpu_callout_mask; cpumask_var_t cpu_initialized_mask; +cpumask_var_t cpu_callout_mask; +cpumask_var_t cpu_callin_mask; /* representing cpus for which sibling maps can be computed */ cpumask_var_t cpu_sibling_setup_mask; @@ -62,10 +62,10 @@ void __init setup_cpu_local_masks(void) #else /* CONFIG_X86_32 */ -cpumask_t cpu_callin_map; +cpumask_t cpu_sibling_setup_map; cpumask_t cpu_callout_map; cpumask_t cpu_initialized; -cpumask_t cpu_sibling_setup_map; +cpumask_t cpu_callin_map; #endif /* CONFIG_X86_32 */ @@ -79,7 +79,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { * IRET will check the segment types kkeil 2000/10/28 * Also sysret mandates a special GDT layout * - * The TLS descriptors are currently at a different place compared to i386. + * TLS descriptors are currently at a different place compared to i386. * Hopefully nobody expects them at a fixed place (Wine?) */ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, @@ -243,6 +243,7 @@ cpuid_dependent_features[] = { static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) { const struct cpuid_dependent_feature *df; + for (df = cpuid_dependent_features; df->feature; df++) { /* * Note: cpuid_level is set to -1 if unavailable, but @@ -364,12 +365,12 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c) undo that brain damage */ p = q = &c->x86_model_id[0]; while (*p == ' ') - p++; + p++; if (p != q) { - while (*p) - *q++ = *p++; - while (q <= &c->x86_model_id[48]) - *q++ = '\0'; /* Zero-pad the rest */ + while (*p) + *q++ = *p++; + while (q <= &c->x86_model_id[48]) + *q++ = '\0'; /* Zero-pad the rest */ } } @@ -441,14 +442,15 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } else if (smp_num_siblings > 1) { if (smp_num_siblings > nr_cpu_ids) { - printk(KERN_WARNING "CPU: Unsupported number of siblings %d", - smp_num_siblings); + pr_warning("CPU: Unsupported number of siblings %d", + smp_num_siblings); smp_num_siblings = 1; return; } index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, + index_msb); smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -491,7 +493,8 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) if (!printed) { printed++; - printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); + printk(KERN_ERR "CPU: vendor_id '%s'" + "unknown, using generic init.\n", v); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } @@ -637,7 +640,7 @@ void __init early_cpu_init(void) struct cpu_dev **cdev; int count = 0; - printk("KERNEL supported cpus:\n"); + printk(KERN_INFO "KERNEL supported cpus:\n"); for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { struct cpu_dev *cpudev = *cdev; unsigned int j; @@ -650,7 +653,7 @@ void __init early_cpu_init(void) for (j = 0; j < 2; j++) { if (!cpudev->c_ident[j]) continue; - printk(" %s %s\n", cpudev->c_vendor, + printk(KERN_INFO " %s %s\n", cpudev->c_vendor, cpudev->c_ident[j]); } } @@ -952,8 +955,6 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) __aligned(PAGE_SIZE); -extern asmlinkage void ignore_sysret(void); - /* May not be marked __init: used by software suspend */ void syscall_init(void) { @@ -999,6 +1000,22 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) } #endif /* x86_64 */ +/* + * Clear all 6 debug registers: + */ +static void clear_all_debug_regs(void) +{ + int i; + + for (i = 0; i < 8; i++) { + /* Ignore db4, db5 */ + if ((i == 4) || (i == 5)) + continue; + + set_debugreg(0, i); + } +} + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT @@ -1098,17 +1115,7 @@ void __cpuinit cpu_init(void) arch_kgdb_ops.correct_hw_break(); else #endif - { - /* - * Clear all 6 debug registers: - */ - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); - set_debugreg(0UL, 6); - set_debugreg(0UL, 7); - } + clear_all_debug_regs(); fpu_init(); @@ -1129,7 +1136,8 @@ void __cpuinit cpu_init(void) if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) local_irq_enable(); + for (;;) + local_irq_enable(); } printk(KERN_INFO "Initializing CPU#%d\n", cpu); @@ -1159,13 +1167,7 @@ void __cpuinit cpu_init(void) __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); #endif - /* Clear all 6 debug registers: */ - set_debugreg(0, 0); - set_debugreg(0, 1); - set_debugreg(0, 2); - set_debugreg(0, 3); - set_debugreg(0, 6); - set_debugreg(0, 7); + clear_all_debug_regs(); /* * Force FPU initialization: @@ -1186,5 +1188,4 @@ void __cpuinit cpu_init(void) xsave_init(); } - #endif -- cgit v1.2.3 From 88200bc28da38bcda1cb1bd218216e83b426d8a8 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 14 Mar 2009 12:08:13 +0530 Subject: x86: entry_32.S fix compile warnings - fix work mask bit width Fix: arch/x86/kernel/entry_32.S:446: Warning: 00000000080001d1 shortened to 00000000000001d1 arch/x86/kernel/entry_32.S:457: Warning: 000000000800feff shortened to 000000000000feff arch/x86/kernel/entry_32.S:527: Warning: 00000000080001d1 shortened to 00000000000001d1 arch/x86/kernel/entry_32.S:541: Warning: 000000000800feff shortened to 000000000000feff arch/x86/kernel/entry_32.S:676: Warning: 0000000008000091 shortened to 0000000000000091 TIF_SYSCALL_FTRACE is 0x08000000 and until now we checked the first 16 bits of the work mask - bit 27 falls outside of that. Update the entry_32.S code to check the full 32-bit mask. [ %cx => %ecx fix from Cyrill Gorcunov ] Signed-off-by: Jaswinder Singh Rajput Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: "H. Peter Anvin" LKML-Reference: <1237012693.18733.3.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 899e8938e79f..c929add475c9 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -442,8 +442,7 @@ sysenter_past_esp: GET_THREAD_INFO(%ebp) - /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) jnz sysenter_audit sysenter_do_call: cmpl $(nr_syscalls), %eax @@ -454,7 +453,7 @@ sysenter_do_call: DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx - testw $_TIF_ALLWORK_MASK, %cx + testl $_TIF_ALLWORK_MASK, %ecx jne sysexit_audit sysenter_exit: /* if something modifies registers it must also disable sysexit */ @@ -468,7 +467,7 @@ sysenter_exit: #ifdef CONFIG_AUDITSYSCALL sysenter_audit: - testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry addl $4,%esp CFI_ADJUST_CFA_OFFSET -4 @@ -485,7 +484,7 @@ sysenter_audit: jmp sysenter_do_call sysexit_audit: - testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx jne syscall_exit_work TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_ANY) @@ -498,7 +497,7 @@ sysexit_audit: DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx - testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx jne syscall_exit_work movl PT_EAX(%esp),%eax /* reload syscall return value */ jmp sysenter_exit @@ -523,8 +522,7 @@ ENTRY(system_call) SAVE_ALL GET_THREAD_INFO(%ebp) # system call tracing in operation / emulation - /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -538,7 +536,7 @@ syscall_exit: # between sampling and the iret TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx - testw $_TIF_ALLWORK_MASK, %cx # current->work + testl $_TIF_ALLWORK_MASK, %ecx # current->work jne syscall_exit_work restore_all: @@ -673,7 +671,7 @@ END(syscall_trace_entry) # perform syscall exit tracing ALIGN syscall_exit_work: - testb $_TIF_WORK_SYSCALL_EXIT, %cl + testl $_TIF_WORK_SYSCALL_EXIT, %ecx jz work_pending TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call -- cgit v1.2.3 From 0f3fa48a7eaf5d1118cfda1650e8c759b2a116e4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Mar 2009 08:46:17 +0100 Subject: x86: cpu/common.c more cleanups Complete/fix the cleanups of cpu/common.c: - fix ugly warning due to asm/topology.h -> linux/topology.h change - standardize the style across the file - simplify/refactor the code flow where possible Cc: Jaswinder Singh Rajput LKML-Reference: <1237009789.4387.2.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 243 +++++++++++++++++++++++++------------------ 1 file changed, 143 insertions(+), 100 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cad6878c88db..a9e3791ca098 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,4 +1,3 @@ -#include #include #include #include @@ -18,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -82,45 +82,45 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { * TLS descriptors are currently at a different place compared to i386. * Hopefully nobody expects them at a fixed place (Wine?) */ - [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, - [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, + [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, + [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, #else - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, /* * Segments used for calling PnP BIOS have byte granularity. * They code segments and data segments have fixed 64k limits, * the transfer segment sizes are set at run time. */ /* 32-bit code */ - [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, + [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, /* 16-bit code */ - [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, + [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, + [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, + [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, + [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, /* * The APM segments have byte granularity and their bases * are set at run time. All have 64k limits. */ /* 32-bit code */ - [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, + [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, /* 16-bit code */ - [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, + [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, /* data */ - [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, + [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, - [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, - [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, + [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, + [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, GDT_STACK_CANARY_INIT #endif } }; @@ -164,16 +164,17 @@ static inline int flag_is_changeable_p(u32 flag) * the CPUID. Add "volatile" to not allow gcc to * optimize the subsequent calls to this function. */ - asm volatile ("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" + asm volatile ("pushfl \n\t" + "pushfl \n\t" + "popl %0 \n\t" + "movl %0, %1 \n\t" + "xorl %2, %0 \n\t" + "pushl %0 \n\t" + "popfl \n\t" + "pushfl \n\t" + "popl %0 \n\t" + "popfl \n\t" + : "=&r" (f1), "=&r" (f2) : "ir" (flag)); @@ -188,18 +189,22 @@ static int __cpuinit have_cpuid_p(void) static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { - /* Disable processor serial number */ - unsigned long lo, hi; - rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_cpu_cap(c, X86_FEATURE_PN); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } + unsigned long lo, hi; + + if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr) + return; + + /* Disable processor serial number: */ + + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_cpu_cap(c, X86_FEATURE_PN); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); } static int __init x86_serial_nr_setup(char *s) @@ -232,6 +237,7 @@ struct cpuid_dependent_feature { u32 feature; u32 level; }; + static const struct cpuid_dependent_feature __cpuinitconst cpuid_dependent_features[] = { { X86_FEATURE_MWAIT, 0x00000005 }, @@ -245,6 +251,9 @@ static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) const struct cpuid_dependent_feature *df; for (df = cpuid_dependent_features; df->feature; df++) { + + if (!cpu_has(c, df->feature)) + continue; /* * Note: cpuid_level is set to -1 if unavailable, but * extended_extended_level is set to 0 if unavailable @@ -252,26 +261,26 @@ static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) * when signed; hence the weird messing around with * signs here... */ - if (cpu_has(c, df->feature) && - ((s32)df->level < 0 ? + if (!((s32)df->level < 0 ? (u32)df->level > (u32)c->extended_cpuid_level : - (s32)df->level > (s32)c->cpuid_level)) { - clear_cpu_cap(c, df->feature); - if (warn) - printk(KERN_WARNING - "CPU: CPU feature %s disabled " - "due to lack of CPUID level 0x%x\n", - x86_cap_flags[df->feature], - df->level); - } + (s32)df->level > (s32)c->cpuid_level)) + continue; + + clear_cpu_cap(c, df->feature); + if (!warn) + continue; + + printk(KERN_WARNING + "CPU: CPU feature %s disabled, no CPUID level 0x%x\n", + x86_cap_flags[df->feature], df->level); } } /* * Naming convention should be: [()] * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * + * in particular, if CPUID levels 0x80000002..4 are supported, this + * isn't used */ /* Look up CPU names by table lookup. */ @@ -308,8 +317,10 @@ void load_percpu_segment(int cpu) load_stack_canary_segment(); } -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ +/* + * Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. + */ void switch_to_new_gdt(int cpu) { struct desc_ptr gdt_descr; @@ -355,14 +366,16 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c) if (c->extended_cpuid_level < 0x80000004) return; - v = (unsigned int *) c->x86_model_id; + v = (unsigned int *)c->x86_model_id; cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); c->x86_model_id[48] = 0; - /* Intel chips right-justify this string for some dumb reason; - undo that brain damage */ + /* + * Intel chips right-justify this string for some dumb reason; + * undo that brain damage: + */ p = q = &c->x86_model_id[0]; while (*p == ' ') p++; @@ -439,28 +452,30 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) if (smp_num_siblings == 1) { printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { + goto out; + } - if (smp_num_siblings > nr_cpu_ids) { - pr_warning("CPU: Unsupported number of siblings %d", - smp_num_siblings); - smp_num_siblings = 1; - return; - } + if (smp_num_siblings <= 1) + goto out; - index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, - index_msb); + if (smp_num_siblings > nr_cpu_ids) { + pr_warning("CPU: Unsupported number of siblings %d", + smp_num_siblings); + smp_num_siblings = 1; + return; + } - smp_num_siblings = smp_num_siblings / c->x86_max_cores; + index_msb = get_count_order(smp_num_siblings); + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); - index_msb = get_count_order(smp_num_siblings); + smp_num_siblings = smp_num_siblings / c->x86_max_cores; - core_bits = get_count_order(c->x86_max_cores); + index_msb = get_count_order(smp_num_siblings); - c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); - } + core_bits = get_count_order(c->x86_max_cores); + + c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & + ((1 << core_bits) - 1); out: if ((c->x86_max_cores * smp_num_siblings) > 1) { @@ -475,8 +490,8 @@ out: static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; - int i; static int printed; + int i; for (i = 0; i < X86_VENDOR_NUM; i++) { if (!cpu_devs[i]) @@ -485,6 +500,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) if (!strcmp(v, cpu_devs[i]->c_ident[0]) || (cpu_devs[i]->c_ident[1] && !strcmp(v, cpu_devs[i]->c_ident[1]))) { + this_cpu = cpu_devs[i]; c->x86_vendor = this_cpu->c_x86_vendor; return; @@ -493,8 +509,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) if (!printed) { printed++; - printk(KERN_ERR "CPU: vendor_id '%s'" - "unknown, using generic init.\n", v); + printk(KERN_ERR + "CPU: vendor_id '%s' unknown, using generic init.\n", v); + printk(KERN_ERR "CPU: Your system may be unstable.\n"); } @@ -514,14 +531,17 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; + cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = (tfms >> 8) & 0xf; c->x86_model = (tfms >> 4) & 0xf; c->x86_mask = tfms & 0xf; + if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xf) << 4; + if (cap0 & (1<<19)) { c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; c->x86_cache_alignment = c->x86_clflush_size; @@ -537,6 +557,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 capability, excap; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; @@ -545,6 +566,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); c->extended_cpuid_level = xlvl; + if ((xlvl & 0xffff0000) == 0x80000000) { if (xlvl >= 0x80000001) { c->x86_capability[1] = cpuid_edx(0x80000001); @@ -762,8 +784,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) squash_the_stupid_serial_number(c); /* - * The vendor-specific functions might have changed features. Now - * we do "generic changes." + * The vendor-specific functions might have changed features. + * Now we do "generic changes." */ /* Filter out anything that depends on CPUID levels we don't have */ @@ -846,8 +868,8 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) } struct msr_range { - unsigned min; - unsigned max; + unsigned min; + unsigned max; }; static struct msr_range msr_range_array[] __cpuinitdata = { @@ -859,14 +881,15 @@ static struct msr_range msr_range_array[] __cpuinitdata = { static void __cpuinit print_cpu_msr(void) { + unsigned index_min, index_max; unsigned index; u64 val; int i; - unsigned index_min, index_max; for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { index_min = msr_range_array[i].min; index_max = msr_range_array[i].max; + for (index = index_min; index < index_max; index++) { if (rdmsrl_amd_safe(index, &val)) continue; @@ -876,6 +899,7 @@ static void __cpuinit print_cpu_msr(void) } static int show_msr __cpuinitdata; + static __init int setup_show_msr(char *arg) { int num; @@ -899,10 +923,12 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { char *vendor = NULL; - if (c->x86_vendor < X86_VENDOR_NUM) + if (c->x86_vendor < X86_VENDOR_NUM) { vendor = this_cpu->c_vendor; - else if (c->cpuid_level >= 0) - vendor = c->x86_vendor_id; + } else { + if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + } if (vendor && !strstr(c->x86_model_id, vendor)) printk(KERN_CONT "%s ", vendor); @@ -929,10 +955,12 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) static __init int setup_disablecpuid(char *arg) { int bit; + if (get_option(&arg, &bit) && bit < NCAPINTS*32) setup_clear_cpu_cap(bit); else return 0; + return 1; } __setup("clearcpuid=", setup_disablecpuid); @@ -942,6 +970,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE); + DEFINE_PER_CPU(char *, irq_stack_ptr) = init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; @@ -951,6 +980,17 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack); DEFINE_PER_CPU(unsigned int, irq_count) = -1; +/* + * Special IST stacks which the CPU switches to when it calls + * an IST-marked descriptor entry. Up to 7 stacks (hardware + * limit), all of them are 4K, except the debug stack which + * is 8K. + */ +static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, + [DEBUG_STACK - 1] = DEBUG_STKSZ +}; + static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) __aligned(PAGE_SIZE); @@ -984,7 +1024,7 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); -#else /* x86_64 */ +#else /* CONFIG_X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR DEFINE_PER_CPU(unsigned long, stack_canary); @@ -996,9 +1036,10 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) memset(regs, 0, sizeof(struct pt_regs)); regs->fs = __KERNEL_PERCPU; regs->gs = __KERNEL_STACK_CANARY; + return regs; } -#endif /* x86_64 */ +#endif /* CONFIG_X86_64 */ /* * Clear all 6 debug registers: @@ -1024,15 +1065,20 @@ static void clear_all_debug_regs(void) * A lot of state is already set up in PDA init for 64 bit */ #ifdef CONFIG_X86_64 + void __cpuinit cpu_init(void) { - int cpu = stack_smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); - unsigned long v; + struct orig_ist *orig_ist; struct task_struct *me; + struct tss_struct *t; + unsigned long v; + int cpu; int i; + cpu = stack_smp_processor_id(); + t = &per_cpu(init_tss, cpu); + orig_ist = &per_cpu(orig_ist, cpu); + #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) @@ -1073,19 +1119,17 @@ void __cpuinit cpu_init(void) * set up and load the per-CPU TSS */ if (!orig_ist->ist[0]) { - static const unsigned int sizes[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, - [DEBUG_STACK - 1] = DEBUG_STKSZ - }; char *estacks = per_cpu(exception_stacks, cpu); + for (v = 0; v < N_EXCEPTION_STACKS; v++) { - estacks += sizes[v]; + estacks += exception_stack_sizes[v]; orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } } t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + /* * <= is required because the CPU will access up to * 8 bits beyond the end of the IO permission bitmap. @@ -1187,5 +1231,4 @@ void __cpuinit cpu_init(void) xsave_init(); } - #endif -- cgit v1.2.3 From 78a8b35bc7abf8b8333d6f625e08c0f7cc1c3742 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 12 Mar 2009 22:36:01 -0700 Subject: x86: make e820_update_range() handle small range update Impact: enhance e820 code to handle more cases Try to handle new range which could be covered by one entry. Signed-off-by: Yinghai Lu Cc: jbeulich@novell.com LKML-Reference: <49B9F0C1.10402@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 95b81c18b6bc..0c34ff49ff4d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -427,6 +427,7 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start, u64 size, unsigned old_type, unsigned new_type) { + u64 end; unsigned int i; u64 real_updated_size = 0; @@ -435,21 +436,35 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start, if (size > (ULLONG_MAX - start)) size = ULLONG_MAX - start; + end = start + size; for (i = 0; i < e820x->nr_map; i++) { struct e820entry *ei = &e820x->map[i]; u64 final_start, final_end; + u64 ei_end; + if (ei->type != old_type) continue; - /* totally covered? */ - if (ei->addr >= start && - (ei->addr + ei->size) <= (start + size)) { + + ei_end = ei->addr + ei->size; + /* totally covered by new range? */ + if (ei->addr >= start && ei_end <= end) { ei->type = new_type; real_updated_size += ei->size; continue; } + + /* new range is totally covered? */ + if (ei->addr < start && ei_end > end) { + __e820_add_region(e820x, start, size, new_type); + __e820_add_region(e820x, end, ei_end - end, ei->type); + ei->size = start - ei->addr; + real_updated_size += size; + continue; + } + /* partially covered */ final_start = max(start, ei->addr); - final_end = min(start + size, ei->addr + ei->size); + final_end = min(end, ei_end); if (final_start >= final_end) continue; -- cgit v1.2.3 From 63516ef6d6a8379ee5c32463efc6a75f9da8a309 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 13 Mar 2009 12:46:07 -0700 Subject: x86: fix get_mtrr() warning about smp_processor_id() with CONFIG_PREEMPT=y Impact: fix debug warning Jaswinder noticed that there is a warning about smp_processor_id() in get_mtrr(). Fix it by wrapping the printout into a get/put_cpu() pair. Reported-by: Jaswinder Singh Rajput Signed-off-by: Yinghai Lu Cc: Andrew Morton LKML-Reference: <49BAB7FF.4030107@kernel.org> [ changed to get/put_cpu(), cleaned up surrounding code a it. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 950c434f793d..641ee3507d06 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -381,11 +381,14 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, { unsigned int mask_lo, mask_hi, base_lo, base_hi; unsigned int tmp, hi; + int cpu; /* * get_mtrr doesn't need to update mtrr_state, also it could be called * from any cpu, so try to print it out directly. */ + cpu = get_cpu(); + rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); if ((mask_lo & 0x800) == 0) { @@ -393,15 +396,16 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, *base = 0; *size = 0; *type = 0; - return; + goto out_put_cpu; } rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); - /* Work out the shifted address mask. */ + /* Work out the shifted address mask: */ tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; mask_lo = size_or_mask | tmp; - /* Expand tmp with high bits to all 1s*/ + + /* Expand tmp with high bits to all 1s: */ hi = fls(tmp); if (hi > 0) { tmp |= ~((1<<(hi - 1)) - 1); @@ -412,15 +416,19 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, } } - /* This works correctly if size is a power of two, i.e. a - contiguous range. */ + /* + * This works correctly if size is a power of two, i.e. a + * contiguous range: + */ *size = -mask_lo; *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; *type = base_lo & 0xff; printk(KERN_DEBUG " get_mtrr: cpu%d reg%02d base=%010lx size=%010lx %s\n", - smp_processor_id(), reg, *base, *size, + cpu, reg, *base, *size, mtrr_attrib_to_str(*type & 0xff)); +out_put_cpu: + put_cpu(); } /** -- cgit v1.2.3 From d4c90e37a21154c1910b2646e9544bdd32d5bc3a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 13 Mar 2009 14:08:49 -0700 Subject: x86: print the continous part of fixed mtrrs together Impact: print out fewer lines 1. print continuous range with same type together 2. change _INFO to _DEBUG Signed-off-by: Yinghai Lu LKML-Reference: <49BACB61.8000302@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 58 +++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 641ee3507d06..37f28fc7cf95 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -208,13 +208,47 @@ void mtrr_save_fixed_ranges(void *info) get_fixed_ranges(mtrr_state.fixed_ranges); } -static void print_fixed(unsigned base, unsigned step, const mtrr_type*types) +static unsigned __initdata last_fixed_start; +static unsigned __initdata last_fixed_end; +static mtrr_type __initdata last_fixed_type; + +static void __init print_fixed_last(void) +{ + if (!last_fixed_end) + return; + + printk(KERN_DEBUG " %05X-%05X %s\n", last_fixed_start, + last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); + + last_fixed_end = 0; +} + +static void __init update_fixed_last(unsigned base, unsigned end, + mtrr_type type) +{ + last_fixed_start = base; + last_fixed_end = end; + last_fixed_type = type; +} + +static void __init print_fixed(unsigned base, unsigned step, + const mtrr_type *types) { unsigned i; - for (i = 0; i < 8; ++i, ++types, base += step) - printk(KERN_INFO " %05X-%05X %s\n", - base, base + step - 1, mtrr_attrib_to_str(*types)); + for (i = 0; i < 8; ++i, ++types, base += step) { + if (last_fixed_end == 0) { + update_fixed_last(base, base + step, *types); + continue; + } + if (last_fixed_end == base && last_fixed_type == *types) { + last_fixed_end = base + step; + continue; + } + /* new segments: gap or different type */ + print_fixed_last(); + update_fixed_last(base, base + step, *types); + } } static void prepare_set(void); @@ -225,22 +259,26 @@ static void __init print_mtrr_state(void) unsigned int i; int high_width; - printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type)); + printk(KERN_DEBUG "MTRR default type: %s\n", + mtrr_attrib_to_str(mtrr_state.def_type)); if (mtrr_state.have_fixed) { - printk(KERN_INFO "MTRR fixed ranges %sabled:\n", + printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n", mtrr_state.enabled & 1 ? "en" : "dis"); print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); for (i = 0; i < 2; ++i) print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); for (i = 0; i < 8; ++i) print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); + + /* tail */ + print_fixed_last(); } - printk(KERN_INFO "MTRR variable ranges %sabled:\n", + printk(KERN_DEBUG "MTRR variable ranges %sabled:\n", mtrr_state.enabled & 2 ? "en" : "dis"); high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4; for (i = 0; i < num_var_ranges; ++i) { if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) - printk(KERN_INFO " %u base %0*X%05X000 mask %0*X%05X000 %s\n", + printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n", i, high_width, mtrr_state.var_ranges[i].base_hi, @@ -250,10 +288,10 @@ static void __init print_mtrr_state(void) mtrr_state.var_ranges[i].mask_lo >> 12, mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); else - printk(KERN_INFO " %u disabled\n", i); + printk(KERN_DEBUG " %u disabled\n", i); } if (mtrr_tom2) { - printk(KERN_INFO "TOM2: %016llx aka %lldM\n", + printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); } } -- cgit v1.2.3 From 48f4c485c275e9550fa1a1191768689cc3ae0037 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 14 Mar 2009 12:24:02 +0100 Subject: x86/centaur: merge 32 & 64 bit version there should be no difference, except: * the 64bit variant now also initializes the padlock unit. * ->c_early_init() is executed again from ->c_init() * the 64bit fixups made into 32bit path. Signed-off-by: Sebastian Andrzej Siewior Cc: herbert@gondor.apana.org.au LKML-Reference: <1237029843-28076-2-git-send-email-sebastian@breakpoint.cc> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 17 +---------------- arch/x86/kernel/cpu/Makefile | 3 +-- arch/x86/kernel/cpu/centaur.c | 34 +++++++++++++++++++++++++++------- arch/x86/kernel/cpu/centaur_64.c | 37 ------------------------------------- 4 files changed, 29 insertions(+), 62 deletions(-) delete mode 100644 arch/x86/kernel/cpu/centaur_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index a95eaf0e582a..924e156a85ab 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -456,24 +456,9 @@ config CPU_SUP_AMD If unsure, say N. -config CPU_SUP_CENTAUR_32 +config CPU_SUP_CENTAUR default y bool "Support Centaur processors" if PROCESSOR_SELECT - depends on !64BIT - ---help--- - This enables detection, tunings and quirks for Centaur processors - - You need this enabled if you want your kernel to run on a - Centaur CPU. Disabling this option on other types of CPUs - makes the kernel a tiny bit smaller. Disabling it on a Centaur - CPU might render the kernel unbootable. - - If unsure, say N. - -config CPU_SUP_CENTAUR_64 - default y - bool "Support Centaur processors" if PROCESSOR_SELECT - depends on 64BIT ---help--- This enables detection, tunings and quirks for Centaur processors diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index d4356f8b7522..4e242f9a06e4 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -19,8 +19,7 @@ obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o -obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o -obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o +obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 983e0830f0da..c95e831bb095 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -1,11 +1,11 @@ +#include #include #include -#include #include -#include #include #include +#include #include "cpu.h" @@ -276,7 +276,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) */ c->x86_capability[5] = cpuid_edx(0xC0000001); } - +#ifdef CONFIG_X86_32 /* Cyrix III family needs CX8 & PGE explicitly enabled. */ if (c->x86_model >= 6 && c->x86_model <= 9) { rdmsr(MSR_VIA_FCR, lo, hi); @@ -288,6 +288,11 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) /* Before Nehemiah, the C3's had 3dNOW! */ if (c->x86_model >= 6 && c->x86_model < 9) set_cpu_cap(c, X86_FEATURE_3DNOW); +#endif + if (c->x86 == 0x6 && c->x86_model >= 0xf) { + c->x86_cache_alignment = c->x86_clflush_size * 2; + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + } display_cacheinfo(c); } @@ -316,16 +321,25 @@ enum { static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) { switch (c->x86) { +#ifdef CONFIG_X86_32 case 5: /* Emulate MTRRs using Centaur's MCR. */ set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); break; +#endif + case 6: + if (c->x86_model >= 0xf) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + break; } +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSENTER32); +#endif } static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { - +#ifdef CONFIG_X86_32 char *name; u32 fcr_set = 0; u32 fcr_clr = 0; @@ -337,8 +351,10 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_cpu_cap(c, 0*32+31); - +#endif + early_init_centaur(c); switch (c->x86) { +#ifdef CONFIG_X86_32 case 5: switch (c->x86_model) { case 4: @@ -442,16 +458,20 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) } sprintf(c->x86_model_id, "WinChip %s", name); break; - +#endif case 6: init_c3(c); break; } +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +#endif } static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) { +#ifdef CONFIG_X86_32 /* VIA C3 CPUs (670-68F) need further shifting. */ if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) size >>= 8; @@ -464,7 +484,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) if ((c->x86 == 6) && (c->x86_model == 9) && (c->x86_mask == 1) && (size == 65)) size -= 1; - +#endif return size; } diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c deleted file mode 100644 index 51b09c48c9c7..000000000000 --- a/arch/x86/kernel/cpu/centaur_64.c +++ /dev/null @@ -1,37 +0,0 @@ -#include -#include - -#include -#include - -#include "cpu.h" - -static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) -{ - if (c->x86 == 0x6 && c->x86_model >= 0xf) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -} - -static void __cpuinit init_centaur(struct cpuinfo_x86 *c) -{ - early_init_centaur(c); - - if (c->x86 == 0x6 && c->x86_model >= 0xf) { - c->x86_cache_alignment = c->x86_clflush_size * 2; - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - } - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); -} - -static const struct cpu_dev centaur_cpu_dev __cpuinitconst = { - .c_vendor = "Centaur", - .c_ident = { "CentaurHauls" }, - .c_early_init = early_init_centaur, - .c_init = init_centaur, - .c_x86_vendor = X86_VENDOR_CENTAUR, -}; - -cpu_dev_register(centaur_cpu_dev); - -- cgit v1.2.3 From f4c3c4cdb1de232ff37cf4339eb2f36c84e20da6 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 13 Mar 2009 19:59:26 +0530 Subject: x86: cpu_debug add support for various AMD CPUs Impact: Added AMD CPUs support Added flags for various AMD CPUs. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu_debug.h | 33 ++++++++- arch/x86/kernel/cpu/cpu_debug.c | 150 +++++++++++++++++++++++++++------------ 2 files changed, 136 insertions(+), 47 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h index 56f1635e4617..222802029fa6 100755 --- a/arch/x86/include/asm/cpu_debug.h +++ b/arch/x86/include/asm/cpu_debug.h @@ -33,6 +33,8 @@ enum cpu_debug_bit { CPU_VMX_BIT, /* VMX */ CPU_CALL_BIT, /* System Call */ CPU_BASE_BIT, /* BASE Address */ + CPU_VER_BIT, /* Version ID */ + CPU_CONF_BIT, /* Configuration */ CPU_SMM_BIT, /* System mgmt mode */ CPU_SVM_BIT, /*Secure Virtual Machine*/ CPU_OSVM_BIT, /* OS-Visible Workaround*/ @@ -69,6 +71,8 @@ enum cpu_debug_bit { #define CPU_VMX (1 << CPU_VMX_BIT) #define CPU_CALL (1 << CPU_CALL_BIT) #define CPU_BASE (1 << CPU_BASE_BIT) +#define CPU_VER (1 << CPU_VER_BIT) +#define CPU_CONF (1 << CPU_CONF_BIT) #define CPU_SMM (1 << CPU_SMM_BIT) #define CPU_SVM (1 << CPU_SVM_BIT) #define CPU_OSVM (1 << CPU_OSVM_BIT) @@ -123,10 +127,15 @@ enum cpu_processor_bit { CPU_INTEL_ATOM_BIT, CPU_INTEL_XEON_P4_BIT, CPU_INTEL_XEON_MP_BIT, +/* AMD */ + CPU_AMD_K6_BIT, + CPU_AMD_K7_BIT, + CPU_AMD_K8_BIT, + CPU_AMD_0F_BIT, + CPU_AMD_10_BIT, + CPU_AMD_11_BIT, }; -#define CPU_ALL (~0) /* Select all CPUs */ - #define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT) #define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT) #define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT) @@ -156,9 +165,27 @@ enum cpu_processor_bit { #define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT) #define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE) -/* Select all Intel CPUs*/ +/* Select all supported Intel CPUs */ #define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE) +#define CPU_AMD_K6 (1 << CPU_AMD_K6_BIT) +#define CPU_AMD_K7 (1 << CPU_AMD_K7_BIT) +#define CPU_AMD_K8 (1 << CPU_AMD_K8_BIT) +#define CPU_AMD_0F (1 << CPU_AMD_0F_BIT) +#define CPU_AMD_10 (1 << CPU_AMD_10_BIT) +#define CPU_AMD_11 (1 << CPU_AMD_11_BIT) + +#define CPU_K10_PLUS (CPU_AMD_10 | CPU_AMD_11) +#define CPU_K0F_PLUS (CPU_AMD_0F | CPU_K10_PLUS) +#define CPU_K8_PLUS (CPU_AMD_K8 | CPU_K0F_PLUS) +#define CPU_K7_PLUS (CPU_AMD_K7 | CPU_K8_PLUS) + +/* Select all supported AMD CPUs */ +#define CPU_AMD_ALL (CPU_AMD_K6 | CPU_K7_PLUS) + +/* Select all supported CPUs */ +#define CPU_ALL (CPU_INTEL_ALL | CPU_AMD_ALL) + #define MAX_CPU_FILES 512 struct cpu_private { diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index 21c0cf8ced18..46e29ab96c6a 100755 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c @@ -64,6 +64,8 @@ static struct cpu_debug_base cpu_base[] = { { "vmx", CPU_VMX, 0 }, { "call", CPU_CALL, 0 }, { "base", CPU_BASE, 0 }, + { "ver", CPU_VER, 0 }, + { "conf", CPU_CONF, 0 }, { "smm", CPU_SMM, 0 }, { "svm", CPU_SVM, 0 }, { "osvm", CPU_OSVM, 0 }, @@ -177,54 +179,59 @@ static struct cpu_debug_range cpu_intel_range[] = { /* AMD Registers Range */ static struct cpu_debug_range cpu_amd_range[] = { - { 0x00000010, 0x00000010, CPU_TIME, CPU_ALL, }, - { 0x0000001B, 0x0000001B, CPU_APIC, CPU_ALL, }, - { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_ALL, }, - - { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_ALL, }, - { 0x00000179, 0x0000017A, CPU_MC, CPU_ALL, }, - { 0x0000017B, 0x0000017B, CPU_MC, CPU_ALL, }, - { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_ALL, }, - { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_ALL, }, - - { 0x00000200, 0x0000020F, CPU_MTRR, CPU_ALL, }, - { 0x00000250, 0x00000250, CPU_MTRR, CPU_ALL, }, - { 0x00000258, 0x00000259, CPU_MTRR, CPU_ALL, }, - { 0x00000268, 0x0000026F, CPU_MTRR, CPU_ALL, }, - { 0x00000277, 0x00000277, CPU_PAT, CPU_ALL, }, - { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_ALL, }, - - { 0x00000400, 0x00000417, CPU_MC, CPU_ALL, }, - - { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_ALL, }, - { 0xC0000081, 0xC0000084, CPU_CALL, CPU_ALL, }, - { 0xC0000100, 0xC0000102, CPU_BASE, CPU_ALL, }, - { 0xC0000103, 0xC0000103, CPU_TIME, CPU_ALL, }, - - { 0xC0000408, 0xC000040A, CPU_MC, CPU_ALL, }, - - { 0xc0010000, 0xc0010007, CPU_PMC, CPU_ALL, }, - { 0xc0010010, 0xc0010010, CPU_MTRR, CPU_ALL, }, - { 0xc0010016, 0xc001001A, CPU_MTRR, CPU_ALL, }, - { 0xc001001D, 0xc001001D, CPU_MTRR, CPU_ALL, }, - { 0xc0010030, 0xc0010035, CPU_BIOS, CPU_ALL, }, - { 0xc0010056, 0xc0010056, CPU_SMM, CPU_ALL, }, - { 0xc0010061, 0xc0010063, CPU_SMM, CPU_ALL, }, - { 0xc0010074, 0xc0010074, CPU_MC, CPU_ALL, }, - { 0xc0010111, 0xc0010113, CPU_SMM, CPU_ALL, }, - { 0xc0010114, 0xc0010118, CPU_SVM, CPU_ALL, }, - { 0xc0010119, 0xc001011A, CPU_SMM, CPU_ALL, }, - { 0xc0010140, 0xc0010141, CPU_OSVM, CPU_ALL, }, - { 0xc0010156, 0xc0010156, CPU_SMM, CPU_ALL, }, + { 0x00000000, 0x00000001, CPU_MC, CPU_K10_PLUS, }, + { 0x00000010, 0x00000010, CPU_TIME, CPU_K8_PLUS, }, + { 0x0000001B, 0x0000001B, CPU_APIC, CPU_K8_PLUS, }, + { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_K7_PLUS }, + { 0x0000008B, 0x0000008B, CPU_VER, CPU_K8_PLUS }, + { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_K8_PLUS, }, + + { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_K8_PLUS, }, + { 0x00000179, 0x0000017B, CPU_MC, CPU_K8_PLUS, }, + { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_K8_PLUS, }, + { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_K8_PLUS, }, + + { 0x00000200, 0x0000020F, CPU_MTRR, CPU_K8_PLUS, }, + { 0x00000250, 0x00000250, CPU_MTRR, CPU_K8_PLUS, }, + { 0x00000258, 0x00000259, CPU_MTRR, CPU_K8_PLUS, }, + { 0x00000268, 0x0000026F, CPU_MTRR, CPU_K8_PLUS, }, + { 0x00000277, 0x00000277, CPU_PAT, CPU_K8_PLUS, }, + { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_K8_PLUS, }, + + { 0x00000400, 0x00000413, CPU_MC, CPU_K8_PLUS, }, + + { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_AMD_ALL, }, + { 0xC0000081, 0xC0000084, CPU_CALL, CPU_K8_PLUS, }, + { 0xC0000100, 0xC0000102, CPU_BASE, CPU_K8_PLUS, }, + { 0xC0000103, 0xC0000103, CPU_TIME, CPU_K10_PLUS, }, + + { 0xC0010000, 0xC0010007, CPU_PMC, CPU_K8_PLUS, }, + { 0xC0010010, 0xC0010010, CPU_CONF, CPU_K7_PLUS, }, + { 0xC0010015, 0xC0010015, CPU_CONF, CPU_K7_PLUS, }, + { 0xC0010016, 0xC001001A, CPU_MTRR, CPU_K8_PLUS, }, + { 0xC001001D, 0xC001001D, CPU_MTRR, CPU_K8_PLUS, }, + { 0xC001001F, 0xC001001F, CPU_CONF, CPU_K8_PLUS, }, + { 0xC0010030, 0xC0010035, CPU_BIOS, CPU_K8_PLUS, }, + { 0xC0010044, 0xC0010048, CPU_MC, CPU_K8_PLUS, }, + { 0xC0010050, 0xC0010056, CPU_SMM, CPU_K0F_PLUS, }, + { 0xC0010058, 0xC0010058, CPU_CONF, CPU_K10_PLUS, }, + { 0xC0010060, 0xC0010060, CPU_CACHE, CPU_AMD_11, }, + { 0xC0010061, 0xC0010068, CPU_SMM, CPU_K10_PLUS, }, + { 0xC0010069, 0xC001006B, CPU_SMM, CPU_AMD_11, }, + { 0xC0010070, 0xC0010071, CPU_SMM, CPU_K10_PLUS, }, + { 0xC0010111, 0xC0010113, CPU_SMM, CPU_K8_PLUS, }, + { 0xC0010114, 0xC0010118, CPU_SVM, CPU_K10_PLUS, }, + { 0xC0010140, 0xC0010141, CPU_OSVM, CPU_K10_PLUS, }, + { 0xC0011022, 0xC0011023, CPU_CONF, CPU_K10_PLUS, }, }; -static int get_cpu_modelflag(unsigned cpu) +/* Intel */ +static int get_intel_modelflag(unsigned model) { int flag; - switch (per_cpu(cpu_model, cpu)) { - /* Intel */ + switch (model) { case 0x0501: case 0x0502: case 0x0504: @@ -271,6 +278,59 @@ static int get_cpu_modelflag(unsigned cpu) return flag; } +/* AMD */ +static int get_amd_modelflag(unsigned model) +{ + int flag; + + switch (model >> 8) { + case 0x6: + flag = CPU_AMD_K6; + break; + case 0x7: + flag = CPU_AMD_K7; + break; + case 0x8: + flag = CPU_AMD_K8; + break; + case 0xf: + flag = CPU_AMD_0F; + break; + case 0x10: + flag = CPU_AMD_10; + break; + case 0x11: + flag = CPU_AMD_11; + break; + default: + flag = CPU_NONE; + break; + } + + return flag; +} + +static int get_cpu_modelflag(unsigned cpu) +{ + int flag; + + flag = per_cpu(cpu_model, cpu); + + switch (flag >> 16) { + case X86_VENDOR_INTEL: + flag = get_intel_modelflag(flag); + break; + case X86_VENDOR_AMD: + flag = get_amd_modelflag(flag & 0xffff); + break; + default: + flag = CPU_NONE; + break; + } + + return flag; +} + static int get_cpu_range_count(unsigned cpu) { int index; @@ -311,7 +371,8 @@ static int is_typeflag_valid(unsigned cpu, unsigned flag) return 1; break; case X86_VENDOR_AMD: - if (cpu_amd_range[i].flag & flag) + if ((cpu_amd_range[i].model & modelflag) && + (cpu_amd_range[i].flag & flag)) return 1; break; } @@ -337,7 +398,8 @@ static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, } break; case X86_VENDOR_AMD: - if (cpu_amd_range[index].flag & flag) { + if ((cpu_amd_range[index].model & modelflag) && + (cpu_amd_range[index].flag & flag)) { *min = cpu_amd_range[index].min; *max = cpu_amd_range[index].max; } -- cgit v1.2.3 From b9719a4d9cfa5d46fa6a1eb3e3fc2238e7981e4d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 10 Mar 2009 11:19:18 -0700 Subject: x86: make section delimiter symbols part of their section Impact: cleanup Move the symbols delimiting a section part of the section (section relative) rather than absolute. This avoids any unexpected gaps between the section-start symbol and the first data in the section, which could be caused by implicit alignment of the section data. It also makes the general form of vmlinux_64.lds.S consistent with vmlinux_32.lds.S. Signed-off-by: Jeremy Fitzhardinge Cc: Yinghai Lu Cc: "Eric W. Biederman" Signed-off-by: H. Peter Anvin --- arch/x86/kernel/vmlinux_64.lds.S | 85 +++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 40 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 5bf54e40c6ef..fe5d21ce7241 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -29,8 +29,8 @@ SECTIONS { . = __START_KERNEL; phys_startup_64 = startup_64 - LOAD_OFFSET; - _text = .; /* Text and read-only data */ .text : AT(ADDR(.text) - LOAD_OFFSET) { + _text = .; /* Text and read-only data */ /* First the code that has to be first for bootstrapping */ *(.text.head) _stext = .; @@ -61,13 +61,13 @@ SECTIONS .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA CONSTRUCTORS + _edata = .; /* End of data section */ } :data - _edata = .; /* End of data section */ - . = ALIGN(PAGE_SIZE); - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { + . = ALIGN(PAGE_SIZE); + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); *(.data.cacheline_aligned) } . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); @@ -125,29 +125,29 @@ SECTIONS #undef VVIRT_OFFSET #undef VVIRT - . = ALIGN(THREAD_SIZE); /* init_task */ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { + . = ALIGN(THREAD_SIZE); /* init_task */ *(.data.init_task) }:data.init - . = ALIGN(PAGE_SIZE); .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { + . = ALIGN(PAGE_SIZE); *(.data.page_aligned) } - /* might get freed after init */ - . = ALIGN(PAGE_SIZE); - __smp_alt_begin = .; - __smp_locks = .; .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + /* might get freed after init */ + . = ALIGN(PAGE_SIZE); + __smp_alt_begin = .; + __smp_locks = .; *(.smp_locks) + __smp_locks_end = .; + . = ALIGN(PAGE_SIZE); + __smp_alt_end = .; } - __smp_locks_end = .; - . = ALIGN(PAGE_SIZE); - __smp_alt_end = .; . = ALIGN(PAGE_SIZE); /* Init code and data */ - __init_begin = .; + __init_begin = .; /* paired with __init_end */ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT @@ -159,40 +159,42 @@ SECTIONS __initdata_end = .; } - . = ALIGN(16); - __setup_start = .; - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } - __setup_end = .; - __initcall_start = .; + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { + . = ALIGN(16); + __setup_start = .; + *(.init.setup) + __setup_end = .; + } .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { + __initcall_start = .; INITCALLS + __initcall_end = .; } - __initcall_end = .; - __con_initcall_start = .; .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + __con_initcall_start = .; *(.con_initcall.init) + __con_initcall_end = .; } - __con_initcall_end = .; - __x86_cpu_dev_start = .; .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + __x86_cpu_dev_start = .; *(.x86_cpu_dev.init) + __x86_cpu_dev_end = .; } - __x86_cpu_dev_end = .; SECURITY_INIT . = ALIGN(8); .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { - __parainstructions = .; + __parainstructions = .; *(.parainstructions) - __parainstructions_end = .; + __parainstructions_end = .; } - . = ALIGN(8); - __alt_instructions = .; .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + . = ALIGN(8); + __alt_instructions = .; *(.altinstructions) + __alt_instructions_end = .; } - __alt_instructions_end = .; .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { *(.altinstr_replacement) } @@ -207,9 +209,11 @@ SECTIONS #ifdef CONFIG_BLK_DEV_INITRD . = ALIGN(PAGE_SIZE); - __initramfs_start = .; - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } - __initramfs_end = .; + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } #endif #ifdef CONFIG_SMP @@ -229,20 +233,21 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_end = .; - . = ALIGN(PAGE_SIZE); - __nosave_begin = .; .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - *(.data.nosave) + . = ALIGN(PAGE_SIZE); + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(PAGE_SIZE); + __nosave_end = .; } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */ - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - __bss_start = .; /* BSS */ .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + . = ALIGN(PAGE_SIZE); + __bss_start = .; /* BSS */ *(.bss.page_aligned) *(.bss) - } - __bss_stop = .; + __bss_stop = .; + } _end = . ; -- cgit v1.2.3 From 93dbda7cbcd70a0bd1a99f39f44a9ccde8ab9040 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 26 Feb 2009 17:35:44 -0800 Subject: x86: add brk allocation for very, very early allocations Impact: new interface Add a brk()-like allocator which effectively extends the bss in order to allow very early code to do dynamic allocations. This is better than using statically allocated arrays for data in subsystems which may never get used. The space for brk allocations is in the bss ELF segment, so that the space is mapped properly by the code which maps the kernel, and so that bootloaders keep the space free rather than putting a ramdisk or something into it. The bss itself, delimited by __bss_stop, ends before the brk area (__brk_base to __brk_limit). The kernel text, data and bss is reserved up to __bss_stop. Any brk-allocated data is reserved separately just before the kernel pagetable is built, as that code allocates from unreserved spaces in the e820 map, potentially allocating from any unused brk memory. Ultimately any unused memory in the brk area is used in the general kernel memory pool. Initially the brk space is set to 1MB, which is probably much larger than any user needs (the largest current user is i386 head_32.S's code to build the pagetables to map the kernel, which can get fairly large with a big kernel image and no PSE support). So long as the system has sufficient memory for the bootloader to reserve the kernel+1MB brk, there are no bad effects resulting from an over-large brk. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/sections.h | 7 +++++++ arch/x86/include/asm/setup.h | 4 ++++ arch/x86/kernel/head32.c | 2 +- arch/x86/kernel/head64.c | 2 +- arch/x86/kernel/setup.c | 39 ++++++++++++++++++++++++++++++++++----- arch/x86/kernel/vmlinux_32.lds.S | 7 +++++++ arch/x86/kernel/vmlinux_64.lds.S | 5 +++++ arch/x86/mm/pageattr.c | 5 +++-- arch/x86/xen/mmu.c | 6 +++--- 9 files changed, 65 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 2b8c5160388f..1b7ee5d673c2 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -1 +1,8 @@ +#ifndef _ASM_X86_SECTIONS_H +#define _ASM_X86_SECTIONS_H + #include + +extern char __brk_base[], __brk_limit[]; + +#endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 05c6f6b11fd5..45454f3fa121 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -100,6 +100,10 @@ extern struct boot_params boot_params; */ #define LOWMEMSIZE() (0x9f000) +/* exceedingly early brk-like allocator */ +extern unsigned long _brk_end; +void *extend_brk(size_t size, size_t align); + #ifdef __i386__ void __init i386_start_kernel(void); diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index ac108d1fe182..29f1095b0849 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -18,7 +18,7 @@ void __init i386_start_kernel(void) { reserve_trampoline_memory(); - reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); + reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index f5b272247690..70eaa852c732 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -100,7 +100,7 @@ void __init x86_64_start_reservations(char *real_mode_data) reserve_trampoline_memory(); - reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); + reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f28c56e6bf94..e6b742d2a3f5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -114,6 +114,9 @@ unsigned int boot_cpu_id __read_mostly; +static __initdata unsigned long _brk_start = (unsigned long)__brk_base; +unsigned long _brk_end = (unsigned long)__brk_base; + #ifdef CONFIG_X86_64 int default_cpu_present_to_apicid(int mps_cpu) { @@ -337,6 +340,34 @@ static void __init relocate_initrd(void) } #endif +void * __init extend_brk(size_t size, size_t align) +{ + size_t mask = align - 1; + void *ret; + + BUG_ON(_brk_start == 0); + BUG_ON(align & mask); + + _brk_end = (_brk_end + mask) & ~mask; + BUG_ON((char *)(_brk_end + size) > __brk_limit); + + ret = (void *)_brk_end; + _brk_end += size; + + memset(ret, 0, size); + + return ret; +} + +static void __init reserve_brk(void) +{ + if (_brk_end > _brk_start) + reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK"); + + /* Mark brk area as locked down and no longer taking any new allocations */ + _brk_start = 0; +} + static void __init reserve_initrd(void) { u64 ramdisk_image = boot_params.hdr.ramdisk_image; @@ -717,11 +748,7 @@ void __init setup_arch(char **cmdline_p) init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; -#ifdef CONFIG_X86_32 - init_mm.brk = init_pg_tables_end + PAGE_OFFSET; -#else - init_mm.brk = (unsigned long) &_end; -#endif + init_mm.brk = _brk_end; code_resource.start = virt_to_phys(_text); code_resource.end = virt_to_phys(_etext)-1; @@ -842,6 +869,8 @@ void __init setup_arch(char **cmdline_p) setup_bios_corruption_check(); #endif + reserve_brk(); + /* max_pfn_mapped is updated here */ max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn< #include #include +#include #include #include #include @@ -95,7 +96,7 @@ static inline unsigned long highmap_start_pfn(void) static inline unsigned long highmap_end_pfn(void) { - return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; + return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; } #endif @@ -711,7 +712,7 @@ static int cpa_process_alias(struct cpa_data *cpa) * No need to redo, when the primary call touched the high * mapping already: */ - if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) + if (within(vaddr, (unsigned long) _text, _brk_end)) return 0; /* diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index cb6afa4ec95c..72f6a76dbfb9 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1723,9 +1723,9 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, { pmd_t *kernel_pmd; - init_pg_tables_start = __pa(pgd); - init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; - max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); + max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + + xen_start_info->nr_pt_frames * PAGE_SIZE + + 512*1024); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); -- cgit v1.2.3 From 5368a2be34b96fda9c05d79424fa63a73ead04ed Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sat, 14 Mar 2009 17:19:51 -0700 Subject: x86: move brk initialization out of #ifdef CONFIG_BLK_DEV_INITRD Impact: build fix The brk initialization functions were incorrectly located inside an #ifdef CONFIG_VLK_DEV_INITRD block, causing the obvious build failure in minimal configurations. Signed-off-by: H. Peter Anvin Cc: Jeremy Fitzhardinge --- arch/x86/kernel/setup.c | 57 +++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e6b742d2a3f5..b8329029c150 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -272,6 +272,35 @@ static inline void copy_edd(void) } #endif +void * __init extend_brk(size_t size, size_t align) +{ + size_t mask = align - 1; + void *ret; + + BUG_ON(_brk_start == 0); + BUG_ON(align & mask); + + _brk_end = (_brk_end + mask) & ~mask; + BUG_ON((char *)(_brk_end + size) > __brk_limit); + + ret = (void *)_brk_end; + _brk_end += size; + + memset(ret, 0, size); + + return ret; +} + +static void __init reserve_brk(void) +{ + if (_brk_end > _brk_start) + reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK"); + + /* Mark brk area as locked down and no longer taking any + new allocations */ + _brk_start = 0; +} + #ifdef CONFIG_BLK_DEV_INITRD #ifdef CONFIG_X86_32 @@ -340,34 +369,6 @@ static void __init relocate_initrd(void) } #endif -void * __init extend_brk(size_t size, size_t align) -{ - size_t mask = align - 1; - void *ret; - - BUG_ON(_brk_start == 0); - BUG_ON(align & mask); - - _brk_end = (_brk_end + mask) & ~mask; - BUG_ON((char *)(_brk_end + size) > __brk_limit); - - ret = (void *)_brk_end; - _brk_end += size; - - memset(ret, 0, size); - - return ret; -} - -static void __init reserve_brk(void) -{ - if (_brk_end > _brk_start) - reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK"); - - /* Mark brk area as locked down and no longer taking any new allocations */ - _brk_start = 0; -} - static void __init reserve_initrd(void) { u64 ramdisk_image = boot_params.hdr.ramdisk_image; -- cgit v1.2.3 From ccf3fe02e35f4abca2589f99022cc25084bbd8ae Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 27 Feb 2009 13:27:38 -0800 Subject: x86-32: use brk segment for allocating initial kernel pagetable Impact: use new interface instead of previous ad hoc implementation Rather than having special purpose init_pg_table_start/end variables to delimit the kernel pagetable built by head_32.S, just use the brk mechanism to extend the bss for the new pagetable. This patch removes init_pg_table_start/end and pg0, defines __brk_base (which is page-aligned and immediately follows _end), initializes the brk region to start there, and uses it for the 32-bit pagetable. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable_32.h | 3 --- arch/x86/include/asm/setup.h | 3 --- arch/x86/kernel/head32.c | 3 --- arch/x86/kernel/head_32.S | 14 +++++++------- arch/x86/kernel/setup.c | 6 ------ arch/x86/kernel/vmlinux_32.lds.S | 4 ---- arch/x86/lguest/boot.c | 8 -------- 7 files changed, 7 insertions(+), 34 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 97612fc7632f..31bd120cf2a2 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -42,9 +42,6 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); */ #undef TEST_ACCESS_OK -/* The boot page tables (all created as a single array) */ -extern unsigned long pg0[]; - #ifdef CONFIG_X86_PAE # include #else diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 45454f3fa121..366d36639940 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -109,9 +109,6 @@ void *extend_brk(size_t size, size_t align); void __init i386_start_kernel(void); extern void probe_roms(void); -extern unsigned long init_pg_tables_start; -extern unsigned long init_pg_tables_end; - #else void __init x86_64_start_kernel(char *real_mode); void __init x86_64_start_reservations(char *real_mode_data); diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 29f1095b0849..3f8579f8d42c 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -29,9 +29,6 @@ void __init i386_start_kernel(void) reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } #endif - reserve_early(init_pg_tables_start, init_pg_tables_end, - "INIT_PG_TABLE"); - reserve_ebda_region(); /* diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index c32ca19d591a..db6652710e98 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -167,7 +167,7 @@ num_subarch_entries = (. - subarch_entries) / 4 /* * Initialize page tables. This creates a PDE and a set of page * tables, which are located immediately beyond _end. The variable - * init_pg_tables_end is set up to point to the first "safe" location. + * _brk_end is set up to point to the first "safe" location. * Mappings are created both at virtual address 0 (identity mapping) * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. * @@ -190,8 +190,7 @@ default_entry: xorl %ebx,%ebx /* %ebx is kept at zero */ - movl $pa(pg0), %edi - movl %edi, pa(init_pg_tables_start) + movl $pa(__brk_base), %edi movl $pa(swapper_pg_pmd), %edx movl $PTE_IDENT_ATTR, %eax 10: @@ -216,7 +215,8 @@ default_entry: cmpl %ebp,%eax jb 10b 1: - movl %edi,pa(init_pg_tables_end) + addl $__PAGE_OFFSET, %edi + movl %edi, pa(_brk_end) shrl $12, %eax movl %eax, pa(max_pfn_mapped) @@ -227,8 +227,7 @@ default_entry: page_pde_offset = (__PAGE_OFFSET >> 20); - movl $pa(pg0), %edi - movl %edi, pa(init_pg_tables_start) + movl $pa(__brk_base), %edi movl $pa(swapper_pg_dir), %edx movl $PTE_IDENT_ATTR, %eax 10: @@ -249,7 +248,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20); leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp cmpl %ebp,%eax jb 10b - movl %edi,pa(init_pg_tables_end) + addl $__PAGE_OFFSET, %edi + movl %edi, pa(_brk_end) shrl $12, %eax movl %eax, pa(max_pfn_mapped) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b8329029c150..3ac2aa7b9eaf 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -161,12 +161,6 @@ static struct resource bss_resource = { #ifdef CONFIG_X86_32 -/* This value is set up by the early boot code to point to the value - immediately after the boot time page tables. It contains a *physical* - address, and must not be in the .bss segment! */ -unsigned long init_pg_tables_start __initdata = ~0UL; -unsigned long init_pg_tables_end __initdata = ~0UL; - static struct resource video_ram_resource = { .name = "Video RAM area", .start = 0xa0000, diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 27e44aa21585..1063fbe93c73 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -196,10 +196,6 @@ SECTIONS __brk_limit = . ; _end = . ; - - /* This is where the kernel creates the early boot page tables */ - . = ALIGN(PAGE_SIZE); - pg0 = . ; } /* Sections to be discarded */ diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 9fe4ddaa8f6f..90e44a10e68a 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1058,14 +1058,6 @@ __init void lguest_init(void) * lguest_init() where the rest of the fairly chaotic boot setup * occurs. */ - /* The native boot code sets up initial page tables immediately after - * the kernel itself, and sets init_pg_tables_end so they're not - * clobbered. The Launcher places our initial pagetables somewhere at - * the top of our physical memory, so we don't need extra space: set - * init_pg_tables_end to the end of the kernel. */ - init_pg_tables_start = __pa(pg0); - init_pg_tables_end = __pa(pg0); - /* As described in head_32.S, we map the first 128M of memory. */ max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; -- cgit v1.2.3 From 6de6cb442e76bbaf2e685150be8ddac0f237a59c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 27 Feb 2009 13:35:45 -0800 Subject: x86: use brk allocation for DMI Impact: use new interface instead of previous ad hoc implementation Use extend_brk() to allocate memory for DMI rather than having an ad-hoc allocator. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/dmi.h | 14 ++------------ arch/x86/kernel/setup.c | 6 ------ 2 files changed, 2 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index bc68212c6bc0..aa32f7e6c197 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h @@ -2,21 +2,11 @@ #define _ASM_X86_DMI_H #include +#include -#define DMI_MAX_DATA 2048 - -extern int dmi_alloc_index; -extern char dmi_alloc_data[DMI_MAX_DATA]; - -/* This is so early that there is no good way to allocate dynamic memory. - Allocate data in an BSS array. */ static inline void *dmi_alloc(unsigned len) { - int idx = dmi_alloc_index; - if ((dmi_alloc_index + len) > DMI_MAX_DATA) - return NULL; - dmi_alloc_index += len; - return dmi_alloc_data + idx; + return extend_brk(len, sizeof(int)); } /* Use early IO mappings for DMI because it's initialized early */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3ac2aa7b9eaf..e894f36335f2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -215,12 +215,6 @@ unsigned long mmu_cr4_features = X86_CR4_PAE; /* Boot loader ID as an integer, for the benefit of proc_dointvec */ int bootloader_type; -/* - * Early DMI memory - */ -int dmi_alloc_index; -char dmi_alloc_data[DMI_MAX_DATA]; - /* * Setup options */ -- cgit v1.2.3 From 7543c1de84ed93c6769c9f20dced08a522af8912 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 12 Mar 2009 16:04:42 -0700 Subject: x86-32: compute initial mapping size more accurately Impact: simplification We only need to map the kernel in head_32.S, not the whole of lowmem. We use 512MB as a reasonable (but arbitrary) limit on the maximum size of the kernel image. Signed-off-by: Yinghai Lu Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/page_32_types.h | 5 +++++ arch/x86/kernel/head_32.S | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index f1e4a79a6e41..0f915ae649a7 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -39,6 +39,11 @@ #define __VIRTUAL_MASK_SHIFT 32 #endif /* CONFIG_X86_PAE */ +/* + * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S) + */ +#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) + #ifndef __ASSEMBLY__ /* diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index db6652710e98..3ce5456dfbe6 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -54,7 +54,7 @@ * * This should be a multiple of a page. */ -LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) +LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT /* * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate -- cgit v1.2.3 From 796216a57fe45c04adc35bda1f0782efec78a713 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Mar 2009 16:09:49 -0700 Subject: x86: allow extend_brk users to reserve brk space Impact: new interface; remove hard-coded limit Add RESERVE_BRK(name, size) macro to reserve space in the brk area. This should be a conservative (ie, larger) estimate of how much space might possibly be required from the brk area. Any unused space will be freed, so there's no real downside on making the reservation too large (within limits). The name should be unique within a given file, and somewhat descriptive. The C definition of RESERVE_BRK() ends up being more complex than one would expect to work around a cluster of gcc infelicities: The first attempt was to simply try putting __section(.brk_reservation) on a variable. This doesn't work because it ends up making it a @progbits section, which gets actual space allocated in the vmlinux executable. The second attempt was to emit the space into a section using asm, but gcc doesn't allow arguments to be passed to file-level asm() statements, making it hard to pass in the size. The final attempt is to wrap the asm() in a function to allow it to have arguments, and put the function itself into the .discard section, which vmlinux*.lds drops entirely from the emitted vmlinux. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/setup.h | 30 ++++++++++++++++++++++++++++++ arch/x86/kernel/head_32.S | 2 ++ arch/x86/kernel/setup.c | 2 ++ arch/x86/kernel/vmlinux_32.lds.S | 4 +++- arch/x86/kernel/vmlinux_64.lds.S | 4 +++- 5 files changed, 40 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 366d36639940..61b126b97885 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -104,6 +104,29 @@ extern struct boot_params boot_params; extern unsigned long _brk_end; void *extend_brk(size_t size, size_t align); +/* + * Reserve space in the brk section. The name must be unique within + * the file, and somewhat descriptive. The size is in bytes. Must be + * used at file scope. + * + * (This uses a temp function to wrap the asm so we can pass it the + * size parameter; otherwise we wouldn't be able to. We can't use a + * "section" attribute on a normal variable because it always ends up + * being @progbits, which ends up allocating space in the vmlinux + * executable.) + */ +#define RESERVE_BRK(name,sz) \ + static void __section(.discard) __used \ + __brk_reservation_fn_##name##__(void) { \ + asm volatile ( \ + ".pushsection .brk_reservation,\"aw\",@nobits;" \ + "__brk_reservation_" #name "__:" \ + " 1:.skip %c0;" \ + " .size __brk_reservation_" #name "__, . - 1b;" \ + " .popsection" \ + : : "i" (sz)); \ + } + #ifdef __i386__ void __init i386_start_kernel(void); @@ -115,6 +138,13 @@ void __init x86_64_start_reservations(char *real_mode_data); #endif /* __i386__ */ #endif /* _SETUP */ +#else +#define RESERVE_BRK(name,sz) \ + .pushsection .brk_reservation,"aw",@nobits; \ +__brk_reservation_##name##__: \ +1: .skip sz; \ + .size __brk_reservation_##name##__,.-1b; \ + .popsection #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 3ce5456dfbe6..9e89f2a14b90 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -75,6 +75,8 @@ ALLOCATOR_SLOP = 4 INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm +RESERVE_BRK(pagetables, PAGE_TABLE_SIZE * PAGE_SIZE) + /* * 32-bit kernel entrypoint; only used by the boot CPU. On entry, * %esi points to the real-mode code as a 32-bit pointer. diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e894f36335f2..a0d26237d7cf 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -112,6 +112,8 @@ #define ARCH_SETUP #endif +RESERVE_BRK(dmi_alloc, 65536); + unsigned int boot_cpu_id __read_mostly; static __initdata unsigned long _brk_start = (unsigned long)__brk_base; diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 1063fbe93c73..a1f28b85fb34 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -192,7 +192,8 @@ SECTIONS . = ALIGN(PAGE_SIZE); __brk_base = . ; - . += 1024 * 1024 ; + . += 64 * 1024 ; /* 64k slop space */ + *(.brk_reservation) /* areas brk users have reserved */ __brk_limit = . ; _end = . ; @@ -201,6 +202,7 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { *(.exitcall.exit) + *(.discard) } STABS_DEBUG diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index ff373423138c..7996687663a2 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -250,7 +250,8 @@ SECTIONS . = ALIGN(PAGE_SIZE); __brk_base = . ; - . += 1024 * 1024 ; + . += 64 * 1024; /* 64k slop space */ + *(.brk_reservation) /* areas brk users have reserved */ __brk_limit = . ; } @@ -260,6 +261,7 @@ SECTIONS /DISCARD/ : { *(.exitcall.exit) *(.eh_frame) + *(.discard) } STABS_DEBUG -- cgit v1.2.3 From 2bd2753ff46346543ab92e80df9d96366e21baa5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 9 Mar 2009 01:15:57 -0700 Subject: x86: put initial_pg_tables into .bss Impact: makes vmlinux section information more useful Don't use ram after _end blindly for pagetables. aka init pages is before _end put those pg table into .bss [Adapted to use brk segment - Jeremy] v2: keep initial page table up to 512M only. v4: put initial page tables just before _end Signed-off-by: Yinghai Lu Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 43 +++++++++++++--------------------------- arch/x86/kernel/vmlinux_32.lds.S | 6 ++++++ 2 files changed, 20 insertions(+), 29 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 9e89f2a14b90..c79741cfb078 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -41,41 +41,28 @@ * This is how much memory *in addition to the memory covered up to * and including _end* we need mapped initially. * We need: - * - one bit for each possible page, but only in low memory, which means - * 2^32/4096/8 = 128K worst case (4G/4G split.) - * - enough space to map all low memory, which means - * (2^32/4096) / 1024 pages (worst case, non PAE) - * (2^32/4096) / 512 + 4 pages (worst case for PAE) - * - a few pages for allocator use before the kernel pagetable has - * been set up + * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) + * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) * * Modulo rounding, each megabyte assigned here requires a kilobyte of * memory, which is currently unreclaimed. * * This should be a multiple of a page. + * + * KERNEL_IMAGE_SIZE should be greater than pa(_end) + * and small than max_low_pfn, otherwise will waste some page table entries */ LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT -/* - * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate - * pagetables from above the 16MB DMA limit, so we'll have to set - * up pagetables 16MB more (worst-case): - */ -#ifdef CONFIG_DEBUG_PAGEALLOC -LOW_PAGES = LOW_PAGES + 0x1000000 -#endif - #if PTRS_PER_PMD > 1 PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD #else PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) #endif -BOOTBITMAP_SIZE = LOW_PAGES / 8 ALLOCATOR_SLOP = 4 -INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm - -RESERVE_BRK(pagetables, PAGE_TABLE_SIZE * PAGE_SIZE) +INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm +RESERVE_BRK(pagetables, INIT_MAP_SIZE) /* * 32-bit kernel entrypoint; only used by the boot CPU. On entry, @@ -168,10 +155,10 @@ num_subarch_entries = (. - subarch_entries) / 4 /* * Initialize page tables. This creates a PDE and a set of page - * tables, which are located immediately beyond _end. The variable + * tables, which are located immediately beyond __brk_base. The variable * _brk_end is set up to point to the first "safe" location. * Mappings are created both at virtual address 0 (identity mapping) - * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. + * and PAGE_OFFSET for up to _end. * * Note that the stack is not yet set up! */ @@ -210,10 +197,9 @@ default_entry: loop 11b /* - * End condition: we must map up to and including INIT_MAP_BEYOND_END - * bytes beyond the end of our own page tables. + * End condition: we must map up to the end. */ - leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp + movl $pa(_end) + PTE_IDENT_ATTR, %ebp cmpl %ebp,%eax jb 10b 1: @@ -243,11 +229,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20); addl $0x1000,%eax loop 11b /* - * End condition: we must map up to and including INIT_MAP_BEYOND_END - * bytes beyond the end of our own page tables; the +0x007 is - * the attribute bits + * End condition: we must map up to end */ - leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp + movl $pa(_end) + PTE_IDENT_ATTR, %ebp cmpl %ebp,%eax jb 10b addl $__PAGE_OFFSET, %edi @@ -638,6 +622,7 @@ swapper_pg_fixmap: .fill 1024,4,0 ENTRY(empty_zero_page) .fill 4096,1,0 + /* * This starts the data section. */ diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index a1f28b85fb34..98424f33e077 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -210,6 +210,12 @@ SECTIONS DWARF_DEBUG } +/* + * Build-time check on the image size: + */ +ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE") + #ifdef CONFIG_KEXEC /* Link time checks */ #include -- cgit v1.2.3 From 6d7942dc2a70a7e74c352107b150265602671588 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 14 Mar 2009 14:32:41 -0700 Subject: x86: fix 64k corruption-check Impact: fix boot crash Need to exit early if the addr is far above 64k. The crash got exposed by: 78a8b35: x86: make e820_update_range() handle small range update Signed-off-by: Yinghai Lu Cc: LKML-Reference: <49BC2279.2030101@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/check.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index b617b1164f1e..fc999e6fc46a 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -86,12 +86,12 @@ void __init setup_bios_corruption_check(void) if (!(addr + 1)) break; + if (addr >= corruption_check_size) + break; + if ((addr + size) > corruption_check_size) size = corruption_check_size - addr; - if (size == 0) - break; - e820_update_range(addr, size, E820_RAM, E820_RESERVED); scan_areas[num_scan_areas].addr = addr; scan_areas[num_scan_areas].size = size; -- cgit v1.2.3 From c61cf4cfe7c73c7aa62dde3ff82cd475b9c41481 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 15 Mar 2009 00:59:19 -0700 Subject: x86: print out more info in e820_update_range() Impact: help debug e820 bugs Try to print out more info, to catch wrong call parameters. Signed-off-by: Yinghai Lu LKML-Reference: <49BCB557.3030000@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 56 +++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 0c34ff49ff4d..fb638d9ce6d2 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -131,6 +131,31 @@ void __init e820_add_region(u64 start, u64 size, int type) __e820_add_region(&e820, start, size, type); } +static void __init e820_print_type(u32 type) +{ + switch (type) { + case E820_RAM: + case E820_RESERVED_KERN: + printk(KERN_CONT "(usable)"); + break; + case E820_RESERVED: + printk(KERN_CONT "(reserved)"); + break; + case E820_ACPI: + printk(KERN_CONT "(ACPI data)"); + break; + case E820_NVS: + printk(KERN_CONT "(ACPI NVS)"); + break; + case E820_UNUSABLE: + printk(KERN_CONT "(unusable)"); + break; + default: + printk(KERN_CONT "type %u", type); + break; + } +} + void __init e820_print_map(char *who) { int i; @@ -140,27 +165,8 @@ void __init e820_print_map(char *who) (unsigned long long) e820.map[i].addr, (unsigned long long) (e820.map[i].addr + e820.map[i].size)); - switch (e820.map[i].type) { - case E820_RAM: - case E820_RESERVED_KERN: - printk(KERN_CONT "(usable)\n"); - break; - case E820_RESERVED: - printk(KERN_CONT "(reserved)\n"); - break; - case E820_ACPI: - printk(KERN_CONT "(ACPI data)\n"); - break; - case E820_NVS: - printk(KERN_CONT "(ACPI NVS)\n"); - break; - case E820_UNUSABLE: - printk("(unusable)\n"); - break; - default: - printk(KERN_CONT "type %u\n", e820.map[i].type); - break; - } + e820_print_type(e820.map[i].type); + printk(KERN_CONT "\n"); } } @@ -437,6 +443,14 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start, size = ULLONG_MAX - start; end = start + size; + printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ", + (unsigned long long) start, + (unsigned long long) end); + e820_print_type(old_type); + printk(KERN_CONT " ==> "); + e820_print_type(new_type); + printk(KERN_CONT "\n"); + for (i = 0; i < e820x->nr_map; i++) { struct e820entry *ei = &e820x->map[i]; u64 final_start, final_end; -- cgit v1.2.3 From 514ec49a5f5146a6c0ade1a688787acf13617868 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 16 Mar 2009 17:07:33 +0900 Subject: x86, mce: remove incorrect __cpuinit for intel_init_cmci() Impact: Bug fix on UP Referring commit cc3ca22063784076bd240fda87217387a8f2ae92, Peter removed __cpuinit annotations for mce_cpu_features() and its successor functions, which caused troubles on UP configurations. However the intel_init_cmci() was introduced after that and it also has __cpuinit annotation even though it is called from mce_cpu_features(). Remove the annotation from that function too. Signed-off-by: Hidetoshi Seto Cc: H. Peter Anvin Cc: Andi Kleen Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index aaa7d9730938..57df3d383470 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -270,7 +270,7 @@ void cmci_reenable(void) cmci_discover(banks, 0); } -static __cpuinit void intel_init_cmci(void) +static void intel_init_cmci(void) { int banks; -- cgit v1.2.3 From 250981e6e1ef04947eccaa55e8c25ddcaa47a02e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Mar 2009 13:07:21 +0100 Subject: x86: reduce preemption off section in exit thread Impact: latency improvement No need to keep preemption disabled over the kfree call. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6afa5232dbb7..156f87582c6c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -65,11 +65,11 @@ void exit_thread(void) { struct task_struct *me = current; struct thread_struct *t = &me->thread; + unsigned long *bp = t->io_bitmap_ptr; - if (me->thread.io_bitmap_ptr) { + if (bp) { struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); - kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; clear_thread_flag(TIF_IO_BITMAP); /* @@ -78,6 +78,7 @@ void exit_thread(void) memset(tss->io_bitmap, 0xff, t->io_bitmap_max); t->io_bitmap_max = 0; put_cpu(); + kfree(bp); } ds_exit_thread(current); -- cgit v1.2.3 From f0348c438c9ea156194d31fadde4a9e75522196f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 16 Mar 2009 16:33:59 -0700 Subject: x86: MTRR workaround for system with stange var MTRRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: don't trim e820 according to wrong mtrr Ozan reports that his server emits strange warning. it turns out the BIOS sets the MTRRs incorrectly. Ignore those strange ranges, and don't trim e820, just emit one warning about BIOS Reported-by: Ozan ÇaÄŸlayan Signed-off-by: Yinghai Lu LKML-Reference: <49BEE1E7.7020706@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/cleanup.c | 11 +++++++++++ arch/x86/kernel/cpu/mtrr/main.c | 16 ++++++++-------- arch/x86/kernel/cpu/mtrr/mtrr.h | 1 + 3 files changed, 20 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 58b58bbf7eb3..f4f89fbc228f 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -189,6 +189,17 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, if (!size) continue; base = range_state[i].base_pfn; + if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && + (mtrr_state.enabled & 1)) { + /* Var MTRR contains UC entry below 1M? Skip it: */ + printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d " + "contains strange UC entry under 1M, check " + "with your system vendor!\n", i); + if (base + size <= (1<<(20-PAGE_SHIFT))) + continue; + size -= (1<<(20-PAGE_SHIFT)) - base; + base = 1<<(20-PAGE_SHIFT); + } subtract_range(range, base, base + size - 1); } if (extra_remove_size) diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 5c2e266f41d7..03cda01f57c7 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -574,7 +574,7 @@ struct mtrr_value { unsigned long lsize; }; -static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES]; +static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; static int mtrr_save(struct sys_device * sysdev, pm_message_t state) { @@ -582,9 +582,9 @@ static int mtrr_save(struct sys_device * sysdev, pm_message_t state) for (i = 0; i < num_var_ranges; i++) { mtrr_if->get(i, - &mtrr_state[i].lbase, - &mtrr_state[i].lsize, - &mtrr_state[i].ltype); + &mtrr_value[i].lbase, + &mtrr_value[i].lsize, + &mtrr_value[i].ltype); } return 0; } @@ -594,11 +594,11 @@ static int mtrr_restore(struct sys_device * sysdev) int i; for (i = 0; i < num_var_ranges; i++) { - if (mtrr_state[i].lsize) + if (mtrr_value[i].lsize) set_mtrr(i, - mtrr_state[i].lbase, - mtrr_state[i].lsize, - mtrr_state[i].ltype); + mtrr_value[i].lbase, + mtrr_value[i].lsize, + mtrr_value[i].ltype); } return 0; } diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 6710e93021a7..77f67f7b347a 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -79,6 +79,7 @@ extern struct mtrr_ops * mtrr_if; extern unsigned int num_var_ranges; extern u64 mtrr_tom2; +extern struct mtrr_state_type mtrr_state; void mtrr_state_warn(void); const char *mtrr_attrib_to_str(int x); -- cgit v1.2.3 From 2118d0c548e8a2205e1a29eb5b89e5f2e9ae2c8b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 15:13:15 +0100 Subject: dma-debug: x86 architecture bindings Impact: make use of DMA-API debugging code in x86 Signed-off-by: Joerg Roedel --- arch/x86/Kconfig | 1 + arch/x86/include/asm/dma-mapping.h | 45 +++++++++++++++++++++++++++++++++----- arch/x86/kernel/pci-dma.c | 6 +++++ 3 files changed, 46 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bc2fbadff9f9..f2cb677b263f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -40,6 +40,7 @@ config X86 select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS select USER_STACKTRACE_SUPPORT + select HAVE_DMA_API_DEBUG config ARCH_DEFCONFIG string diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 9c78bd40ebec..cea7b74963e9 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -56,11 +57,16 @@ dma_map_single(struct device *hwdev, void *ptr, size_t size, enum dma_data_direction dir) { struct dma_map_ops *ops = get_dma_ops(hwdev); + dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); - return ops->map_page(hwdev, virt_to_page(ptr), + addr = ops->map_page(hwdev, virt_to_page(ptr), (unsigned long)ptr & ~PAGE_MASK, size, dir, NULL); + debug_dma_map_page(hwdev, virt_to_page(ptr), + (unsigned long)ptr & ~PAGE_MASK, size, + dir, addr, true); + return addr; } static inline void @@ -72,6 +78,7 @@ dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, BUG_ON(!valid_dma_direction(dir)); if (ops->unmap_page) ops->unmap_page(dev, addr, size, dir, NULL); + debug_dma_unmap_page(dev, addr, size, dir, true); } static inline int @@ -79,9 +86,13 @@ dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { struct dma_map_ops *ops = get_dma_ops(hwdev); + int ents; BUG_ON(!valid_dma_direction(dir)); - return ops->map_sg(hwdev, sg, nents, dir, NULL); + ents = ops->map_sg(hwdev, sg, nents, dir, NULL); + debug_dma_map_sg(hwdev, sg, nents, ents, dir); + + return ents; } static inline void @@ -91,6 +102,7 @@ dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, struct dma_map_ops *ops = get_dma_ops(hwdev); BUG_ON(!valid_dma_direction(dir)); + debug_dma_unmap_sg(hwdev, sg, nents, dir); if (ops->unmap_sg) ops->unmap_sg(hwdev, sg, nents, dir, NULL); } @@ -104,6 +116,7 @@ dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_for_cpu) ops->sync_single_for_cpu(hwdev, dma_handle, size, dir); + debug_dma_sync_single_for_cpu(hwdev, dma_handle, size, dir); flush_write_buffers(); } @@ -116,6 +129,7 @@ dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_for_device) ops->sync_single_for_device(hwdev, dma_handle, size, dir); + debug_dma_sync_single_for_device(hwdev, dma_handle, size, dir); flush_write_buffers(); } @@ -130,6 +144,8 @@ dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, if (ops->sync_single_range_for_cpu) ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, size, dir); + debug_dma_sync_single_range_for_cpu(hwdev, dma_handle, + offset, size, dir); flush_write_buffers(); } @@ -144,6 +160,8 @@ dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, if (ops->sync_single_range_for_device) ops->sync_single_range_for_device(hwdev, dma_handle, offset, size, dir); + debug_dma_sync_single_range_for_device(hwdev, dma_handle, + offset, size, dir); flush_write_buffers(); } @@ -156,6 +174,7 @@ dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); if (ops->sync_sg_for_cpu) ops->sync_sg_for_cpu(hwdev, sg, nelems, dir); + debug_dma_sync_sg_for_cpu(hwdev, sg, nelems, dir); flush_write_buffers(); } @@ -168,6 +187,7 @@ dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); if (ops->sync_sg_for_device) ops->sync_sg_for_device(hwdev, sg, nelems, dir); + debug_dma_sync_sg_for_device(hwdev, sg, nelems, dir); flush_write_buffers(); } @@ -177,15 +197,24 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, enum dma_data_direction dir) { struct dma_map_ops *ops = get_dma_ops(dev); + dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); - return ops->map_page(dev, page, offset, size, dir, NULL); + addr = ops->map_page(dev, page, offset, size, dir, NULL); + debug_dma_map_page(dev, page, offset, size, dir, addr, false); + + return addr; } static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - dma_unmap_single(dev, addr, size, dir); + struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->unmap_page) + ops->unmap_page(dev, addr, size, dir, NULL); + debug_dma_unmap_page(dev, addr, size, dir, false); } static inline void @@ -250,8 +279,11 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (!ops->alloc_coherent) return NULL; - return ops->alloc_coherent(dev, size, dma_handle, - dma_alloc_coherent_gfp_flags(dev, gfp)); + memory = ops->alloc_coherent(dev, size, dma_handle, + dma_alloc_coherent_gfp_flags(dev, gfp)); + debug_dma_alloc_coherent(dev, size, *dma_handle, memory); + + return memory; } static inline void dma_free_coherent(struct device *dev, size_t size, @@ -264,6 +296,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size, if (dma_release_from_coherent(dev, get_order(size), vaddr)) return; + debug_dma_free_coherent(dev, size, vaddr, bus); if (ops->free_coherent) ops->free_coherent(dev, size, vaddr, bus); } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index f293a8df6828..ebf7d454f210 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -44,6 +45,9 @@ struct device x86_dma_fallback_dev = { }; EXPORT_SYMBOL(x86_dma_fallback_dev); +/* Number of entries preallocated for DMA-API debugging */ +#define PREALLOC_DMA_DEBUG_ENTRIES 32768 + int dma_set_mask(struct device *dev, u64 mask) { if (!dev->dma_mask || !dma_supported(dev, mask)) @@ -265,6 +269,8 @@ EXPORT_SYMBOL(dma_supported); static int __init pci_iommu_init(void) { + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + calgary_iommu_init(); intel_iommu_init(); -- cgit v1.2.3 From 86f319529372953e353dc998bc6a761949614903 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 16 Mar 2009 17:50:28 +0100 Subject: dma-debug/x86: register pci bus for dma-debug leak detection Impact: detect dma memory leaks for pci devices Signed-off-by: Joerg Roedel --- arch/x86/kernel/pci-dma.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index ebf7d454f210..c7c4776ff630 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -271,6 +271,10 @@ static int __init pci_iommu_init(void) { dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); +#ifdef CONFIG_PCI + dma_debug_add_bus(&pci_bus_type); +#endif + calgary_iommu_init(); intel_iommu_init(); -- cgit v1.2.3 From a6a80e1d8cf82b46a69f88e659da02749231eb36 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 17 Mar 2009 07:58:26 -0700 Subject: Fix potential fast PIT TSC calibration startup glitch During bootup, when we reprogram the PIT (programmable interval timer) to start counting down from 0xffff in order to use it for the fast TSC calibration, we should also make sure to delay a bit afterwards to allow the PIT hardware to actually start counting with the new value. That will happens at the next CLK pulse (1.193182 MHz), so the easiest way to do that is to just wait at least one microsecond after programming the new PIT counter value. We do that by just reading the counter value back once - which will take about 2us on PC hardware. Reported-and-tested-by: john stultz Signed-off-by: Linus Torvalds --- arch/x86/kernel/tsc.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 599e58168631..9e80207c96a2 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -315,6 +315,15 @@ static unsigned long quick_pit_calibrate(void) outb(0xff, 0x42); outb(0xff, 0x42); + /* + * The PIT starts counting at the next edge, so we + * need to delay for a microsecond. The easiest way + * to do that is to just read back the 16-bit counter + * once from the PIT. + */ + inb(0x42); + inb(0x42); + if (pit_expect_msb(0xff)) { int i; u64 t1, t2, delta; -- cgit v1.2.3 From 9e8912e04e612b43897b4b722205408b92f423e5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 17 Mar 2009 08:13:17 -0700 Subject: Fast TSC calibration: calculate proper frequency error bounds In order for ntpd to correctly synchronize the clocks, the frequency of the system clock must not be off by more than 500 ppm (or, put another way, 1:2000), or ntpd will end up giving up on trying to synchronize properly, and ends up reseting the clock in jumps instead. The fast TSC PIT calibration sometimes failed this test - it was assuming that the PIT reads always took about one microsecond each (2us for the two reads to get a 16-bit timer), and that calibrating TSC to the PIT over 15ms should thus be sufficient to get much closer than 500ppm (max 2us error on both sides giving 4us over 15ms: a 270 ppm error value). However, that assumption does not always hold: apparently some hardware is either very much slower at reading the PIT registers, or there was other noise causing at least one machine to get 700+ ppm errors. So instead of using a fixed 15ms timing loop, this changes the fast PIT calibration to read the TSC delta over the individual PIT timer reads, and use the result to calculate the error bars on the PIT read timing properly. We then successfully calibrate the TSC only if the maximum error bars fall below 500ppm. In the process, we also relax the timing to allow up to 25ms for the calibration, although it can happen much faster depending on hardware. Reported-and-tested-by: Jesper Krogh Cc: john stultz Cc: Thomas Gleixner Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- arch/x86/kernel/tsc.c | 101 ++++++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 45 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 9e80207c96a2..d5cebb52d45b 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -273,30 +273,43 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) * use the TSC value at the transitions to calculate a pretty * good value for the TSC frequencty. */ -static inline int pit_expect_msb(unsigned char val) +static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) { - int count = 0; + int count; + u64 tsc = 0; for (count = 0; count < 50000; count++) { /* Ignore LSB */ inb(0x42); if (inb(0x42) != val) break; + tsc = get_cycles(); } - return count > 50; + *deltap = get_cycles() - tsc; + *tscp = tsc; + + /* + * We require _some_ success, but the quality control + * will be based on the error terms on the TSC values. + */ + return count > 5; } /* - * How many MSB values do we want to see? We aim for a - * 15ms calibration, which assuming a 2us counter read - * error should give us roughly 150 ppm precision for - * the calibration. + * How many MSB values do we want to see? We aim for + * a maximum error rate of 500ppm (in practice the + * real error is much smaller), but refuse to spend + * more than 25ms on it. */ -#define QUICK_PIT_MS 15 -#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) +#define MAX_QUICK_PIT_MS 25 +#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) static unsigned long quick_pit_calibrate(void) { + int i; + u64 tsc, delta; + unsigned long d1, d2; + /* Set the Gate high, disable speaker */ outb((inb(0x61) & ~0x02) | 0x01, 0x61); @@ -324,45 +337,43 @@ static unsigned long quick_pit_calibrate(void) inb(0x42); inb(0x42); - if (pit_expect_msb(0xff)) { - int i; - u64 t1, t2, delta; - unsigned char expect = 0xfe; - - t1 = get_cycles(); - for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { - if (!pit_expect_msb(expect)) - goto failed; + if (pit_expect_msb(0xff, &tsc, &d1)) { + for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { + if (!pit_expect_msb(0xff-i, &delta, &d2)) + break; + + /* + * Iterate until the error is less than 500 ppm + */ + delta -= tsc; + if (d1+d2 < delta >> 11) + goto success; } - t2 = get_cycles(); - - /* - * Make sure we can rely on the second TSC timestamp: - */ - if (!pit_expect_msb(expect)) - goto failed; - - /* - * Ok, if we get here, then we've seen the - * MSB of the PIT decrement QUICK_PIT_ITERATIONS - * times, and each MSB had many hits, so we never - * had any sudden jumps. - * - * As a result, we can depend on there not being - * any odd delays anywhere, and the TSC reads are - * reliable. - * - * kHz = ticks / time-in-seconds / 1000; - * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000 - * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000) - */ - delta = (t2 - t1)*PIT_TICK_RATE; - do_div(delta, QUICK_PIT_ITERATIONS*256*1000); - printk("Fast TSC calibration using PIT\n"); - return delta; } -failed: + printk("Fast TSC calibration failed\n"); return 0; + +success: + /* + * Ok, if we get here, then we've seen the + * MSB of the PIT decrement 'i' times, and the + * error has shrunk to less than 500 ppm. + * + * As a result, we can depend on there not being + * any odd delays anywhere, and the TSC reads are + * reliable (within the error). We also adjust the + * delta to the middle of the error bars, just + * because it looks nicer. + * + * kHz = ticks / time-in-seconds / 1000; + * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 + * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) + */ + delta += (long)(d2 - d1)/2; + delta *= PIT_TICK_RATE; + do_div(delta, i*256*1000); + printk("Fast TSC calibration using PIT\n"); + return delta; } /** -- cgit v1.2.3 From 30390880debce4a68fd23e87a787f27609e4bf4a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 16 Mar 2009 18:57:22 -0400 Subject: prevent boosting kprobes on exception address Don't boost at the addresses which are listed on exception tables, because major page fault will occur on those addresses. In that case, kprobes can not ensure that when instruction buffer can be freed since some processes will sleep on the buffer. kprobes-ia64 already has same check. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Signed-off-by: Linus Torvalds --- arch/x86/kernel/kprobes.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e948b28a5a9a..4558dd3918cf 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -193,6 +193,9 @@ static int __kprobes can_boost(kprobe_opcode_t *opcodes) kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; + if (search_exception_tables(opcodes)) + return 0; /* Page fault may occur on this address. */ + retry: if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) return 0; -- cgit v1.2.3 From c090f532db3ab5b7be503f8ac84b0d33a18646c6 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 16 Mar 2009 12:07:54 -0700 Subject: x86-32: make sure we map enough to fit linear map pagetables Impact: crash fix head_32.S needs to map the kernel itself, and enough space so that mm/init.c can allocate space from the e820 allocator for the linear map of low memory. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index c79741cfb078..d383a7c0e49f 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -38,8 +38,8 @@ #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id /* - * This is how much memory *in addition to the memory covered up to - * and including _end* we need mapped initially. + * This is how much memory in addition to the memory covered up to + * and including _end we need mapped initially. * We need: * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) @@ -52,16 +52,25 @@ * KERNEL_IMAGE_SIZE should be greater than pa(_end) * and small than max_low_pfn, otherwise will waste some page table entries */ -LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT #if PTRS_PER_PMD > 1 -PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD +#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) #else -PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) #endif ALLOCATOR_SLOP = 4 -INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm +/* Enough space to fit pagetables for the low memory linear map */ +MAPPING_BEYOND_END = (PAGE_TABLE_SIZE(1 << (32 - PAGE_SHIFT)) * PAGE_SIZE) + +/* + * Worst-case size of the kernel mapping we need to make: + * the worst-case size of the kernel itself, plus the extra we need + * to map for the linear map. + */ +KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT + +INIT_MAP_SIZE = (PAGE_TABLE_SIZE(KERNEL_PAGES) + ALLOCATOR_SLOP) * PAGE_SIZE_asm RESERVE_BRK(pagetables, INIT_MAP_SIZE) /* @@ -197,9 +206,9 @@ default_entry: loop 11b /* - * End condition: we must map up to the end. + * End condition: we must map up to the end + MAPPING_BEYOND_END. */ - movl $pa(_end) + PTE_IDENT_ATTR, %ebp + movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp cmpl %ebp,%eax jb 10b 1: @@ -229,9 +238,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20); addl $0x1000,%eax loop 11b /* - * End condition: we must map up to end + * End condition: we must map up to the end + MAPPING_BEYOND_END. */ - movl $pa(_end) + PTE_IDENT_ATTR, %ebp + movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp cmpl %ebp,%eax jb 10b addl $__PAGE_OFFSET, %edi -- cgit v1.2.3 From b8a22a6273d5aed248db2695ce9bf65eb3e9fbe6 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 16 Mar 2009 12:10:07 -0700 Subject: x86-32: remove ALLOCATOR_SLOP from head_32.S Impact: cleanup ALLOCATOR_SLOP is a vestigial remain from when we used the bootmem allocator to allocate the kernel's linear memory mapping. Now we directly reserve pages from the e820 mapping, and no longer require secondary structures to keep track of allocated pages. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index d383a7c0e49f..fc884b90b6d2 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -58,7 +58,6 @@ #else #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) #endif -ALLOCATOR_SLOP = 4 /* Enough space to fit pagetables for the low memory linear map */ MAPPING_BEYOND_END = (PAGE_TABLE_SIZE(1 << (32 - PAGE_SHIFT)) * PAGE_SIZE) @@ -70,7 +69,7 @@ MAPPING_BEYOND_END = (PAGE_TABLE_SIZE(1 << (32 - PAGE_SHIFT)) * PAGE_SIZE) */ KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT -INIT_MAP_SIZE = (PAGE_TABLE_SIZE(KERNEL_PAGES) + ALLOCATOR_SLOP) * PAGE_SIZE_asm +INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm RESERVE_BRK(pagetables, INIT_MAP_SIZE) /* -- cgit v1.2.3 From 60ac98213914cc615c67e84bfb964aa95a080d13 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 17 Mar 2009 11:38:23 -0700 Subject: x86-32: tighten the bound on additional memory to map Impact: Tighten bound to avoid masking errors The definition of MAPPING_BEYOND_END was excessive; this has a nasty tendency to mask bugs. We have learned over time that this kind of bug hiding can cause some very strange errors. Therefore, tighten the bound to only need to map the actual kernel area. Signed-off-by: H. Peter Anvin Cc: Jeremy Fitzhardinge Cc: Yinghai Lu --- arch/x86/kernel/head_32.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index fc884b90b6d2..30683883e0cd 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -60,7 +60,8 @@ #endif /* Enough space to fit pagetables for the low memory linear map */ -MAPPING_BEYOND_END = (PAGE_TABLE_SIZE(1 << (32 - PAGE_SHIFT)) * PAGE_SIZE) +MAPPING_BEYOND_END = \ + PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT /* * Worst-case size of the kernel mapping we need to make: -- cgit v1.2.3 From 704439ddf9f8ff1fc8c6d8abaac4bc4c95697e39 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 14 Mar 2009 23:20:47 -0700 Subject: x86/brk: put the brk reservations in their own section Impact: disambiguate real .bss variables from .brk storage Add a .brk section after the .bss section. This has no effect on the final vmlinux, but it more clearly distinguishes the space taken by actual .bss symbols, and the variable space reserved by .brk users. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/vmlinux_32.lds.S | 8 +++++--- arch/x86/kernel/vmlinux_64.lds.S | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 98424f33e077..de14973e47fd 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -189,16 +189,18 @@ SECTIONS *(.bss) . = ALIGN(4); __bss_stop = .; + } + .brk : AT(ADDR(.brk) - LOAD_OFFSET) { . = ALIGN(PAGE_SIZE); __brk_base = . ; - . += 64 * 1024 ; /* 64k slop space */ + . += 64 * 1024 ; /* 64k alignment slop space */ *(.brk_reservation) /* areas brk users have reserved */ __brk_limit = . ; - - _end = . ; } + _end = . ; + /* Sections to be discarded */ /DISCARD/ : { *(.exitcall.exit) diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 7996687663a2..c8742507b030 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -247,10 +247,12 @@ SECTIONS *(.bss.page_aligned) *(.bss) __bss_stop = .; + } + .brk : AT(ADDR(.brk) - LOAD_OFFSET) { . = ALIGN(PAGE_SIZE); __brk_base = . ; - . += 64 * 1024; /* 64k slop space */ + . += 64 * 1024 ; /* 64k alignment slop space */ *(.brk_reservation) /* areas brk users have reserved */ __brk_limit = . ; } -- cgit v1.2.3 From 0a699af8e613a670be50245366fa18cb19ac5172 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 17 Mar 2009 14:14:31 -0700 Subject: x86-32: move _end to a dummy section Impact: build fix with CONFIG_RELOCATABLE Move _end into a dummy section, so that relocs.c will know it is a relocatable symbol. Signed-off-by: H. Peter Anvin Cc: Jeremy Fitzhardinge Cc: Jeremy Fitzhardinge --- arch/x86/kernel/vmlinux_32.lds.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index de14973e47fd..62ad500d55f3 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -199,7 +199,9 @@ SECTIONS __brk_limit = . ; } - _end = . ; + .end : AT(ADDR(.end) - LOAD_OFFSET) { + _end = . ; + } /* Sections to be discarded */ /DISCARD/ : { -- cgit v1.2.3 From 9d783ba042771284fb4ee5013c3d94220755ae7f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:55 -0700 Subject: x86, x2apic: enable fault handling for intr-remapping Impact: interface augmentation (not yet used) Enable fault handling flow for intr-remapping aswell. Fault handling code now shared by both dma-remapping and intr-remapping. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msidef.h | 1 + arch/x86/kernel/apic/io_apic.c | 9 +++- arch/x86/kernel/apic/probe_64.c | 9 ++++ drivers/pci/dmar.c | 102 +++++++++++++++++++++++++++++++++------- drivers/pci/intel-iommu.c | 3 +- drivers/pci/intr_remapping.c | 2 +- include/linux/dmar.h | 5 +- include/linux/intel-iommu.h | 4 +- 8 files changed, 107 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/msidef.h b/arch/x86/include/asm/msidef.h index 6706b3006f13..4cc48af23fef 100644 --- a/arch/x86/include/asm/msidef.h +++ b/arch/x86/include/asm/msidef.h @@ -47,6 +47,7 @@ #define MSI_ADDR_DEST_ID_MASK 0x00ffff0 #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ MSI_ADDR_DEST_ID_MASK) +#define MSI_ADDR_EXT_DEST_ID(dest) ((dest) & 0xffffff00) #define MSI_ADDR_IR_EXT_INT (1 << 4) #define MSI_ADDR_IR_SHV (1 << 3) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 00e6071cefc4..b18a7734d689 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3294,7 +3294,12 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms } else #endif { - msg->address_hi = MSI_ADDR_BASE_HI; + if (x2apic_enabled()) + msg->address_hi = MSI_ADDR_BASE_HI | + MSI_ADDR_EXT_DEST_ID(dest); + else + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = MSI_ADDR_BASE_LO | ((apic->irq_dest_mode == 0) ? @@ -3528,7 +3533,7 @@ void arch_teardown_msi_irq(unsigned int irq) destroy_irq(irq); } -#ifdef CONFIG_DMAR +#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) #ifdef CONFIG_SMP static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 8d7748efe6a8..8297c2b8ed20 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -68,6 +68,15 @@ void __init default_setup_apic_routing(void) apic = &apic_physflat; printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } + +#ifdef CONFIG_X86_X2APIC + /* + * Now that apic routing model is selected, configure the + * fault handling for intr remapping. + */ + if (intr_remapping_enabled) + enable_drhd_fault_handling(); +#endif } /* Same for both flat and physical. */ diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 75d34bf2db50..bb4ed985f9c7 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -511,6 +511,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) return -ENOMEM; iommu->seq_id = iommu_allocated++; + sprintf (iommu->name, "dmar%d", iommu->seq_id); iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE); if (!iommu->reg) { @@ -817,7 +818,13 @@ int dmar_enable_qi(struct intel_iommu *iommu) /* iommu interrupt handling. Most stuff are MSI-like. */ -static const char *fault_reason_strings[] = +enum faulttype { + DMA_REMAP, + INTR_REMAP, + UNKNOWN, +}; + +static const char *dma_remap_fault_reasons[] = { "Software", "Present bit in root entry is clear", @@ -833,14 +840,33 @@ static const char *fault_reason_strings[] = "non-zero reserved fields in CTP", "non-zero reserved fields in PTE", }; + +static const char *intr_remap_fault_reasons[] = +{ + "Detected reserved fields in the decoded interrupt-remapped request", + "Interrupt index exceeded the interrupt-remapping table size", + "Present field in the IRTE entry is clear", + "Error accessing interrupt-remapping table pointed by IRTA_REG", + "Detected reserved fields in the IRTE entry", + "Blocked a compatibility format interrupt request", + "Blocked an interrupt request due to source-id verification failure", +}; + #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) -const char *dmar_get_fault_reason(u8 fault_reason) +const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) { - if (fault_reason > MAX_FAULT_REASON_IDX) + if (fault_reason >= 0x20 && (fault_reason <= 0x20 + + ARRAY_SIZE(intr_remap_fault_reasons))) { + *fault_type = INTR_REMAP; + return intr_remap_fault_reasons[fault_reason - 0x20]; + } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) { + *fault_type = DMA_REMAP; + return dma_remap_fault_reasons[fault_reason]; + } else { + *fault_type = UNKNOWN; return "Unknown"; - else - return fault_reason_strings[fault_reason]; + } } void dmar_msi_unmask(unsigned int irq) @@ -897,16 +923,25 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, u8 fault_reason, u16 source_id, unsigned long long addr) { const char *reason; + int fault_type; - reason = dmar_get_fault_reason(fault_reason); + reason = dmar_get_fault_reason(fault_reason, &fault_type); - printk(KERN_ERR - "DMAR:[%s] Request device [%02x:%02x.%d] " - "fault addr %llx \n" - "DMAR:[fault reason %02d] %s\n", - (type ? "DMA Read" : "DMA Write"), - (source_id >> 8), PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); + if (fault_type == INTR_REMAP) + printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] " + "fault index %llx\n" + "INTR-REMAP:[fault reason %02d] %s\n", + (source_id >> 8), PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr >> 48, + fault_reason, reason); + else + printk(KERN_ERR + "DMAR:[%s] Request device [%02x:%02x.%d] " + "fault addr %llx \n" + "DMAR:[fault reason %02d] %s\n", + (type ? "DMA Read" : "DMA Write"), + (source_id >> 8), PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); return 0; } @@ -920,10 +955,13 @@ static irqreturn_t dmar_fault(int irq, void *dev_id) spin_lock_irqsave(&iommu->register_lock, flag); fault_status = readl(iommu->reg + DMAR_FSTS_REG); + if (fault_status) + printk(KERN_ERR "DRHD: handling fault status reg %x\n", + fault_status); /* TBD: ignore advanced fault log currently */ if (!(fault_status & DMA_FSTS_PPF)) - goto clear_overflow; + goto clear_rest; fault_index = dma_fsts_fault_record_index(fault_status); reg = cap_fault_reg_offset(iommu->cap); @@ -964,11 +1002,10 @@ static irqreturn_t dmar_fault(int irq, void *dev_id) fault_index = 0; spin_lock_irqsave(&iommu->register_lock, flag); } -clear_overflow: - /* clear primary fault overflow */ +clear_rest: + /* clear all the other faults */ fault_status = readl(iommu->reg + DMAR_FSTS_REG); - if (fault_status & DMA_FSTS_PFO) - writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG); + writel(fault_status, iommu->reg + DMAR_FSTS_REG); spin_unlock_irqrestore(&iommu->register_lock, flag); return IRQ_HANDLED; @@ -978,6 +1015,12 @@ int dmar_set_interrupt(struct intel_iommu *iommu) { int irq, ret; + /* + * Check if the fault interrupt is already initialized. + */ + if (iommu->irq) + return 0; + irq = create_irq(); if (!irq) { printk(KERN_ERR "IOMMU: no free vectors\n"); @@ -1003,3 +1046,26 @@ int dmar_set_interrupt(struct intel_iommu *iommu) printk(KERN_ERR "IOMMU: can't request irq\n"); return ret; } + +int __init enable_drhd_fault_handling(void) +{ + struct dmar_drhd_unit *drhd; + + /* + * Enable fault control interrupt. + */ + for_each_drhd_unit(drhd) { + int ret; + struct intel_iommu *iommu = drhd->iommu; + ret = dmar_set_interrupt(iommu); + + if (ret) { + printk(KERN_ERR "DRHD %Lx: failed to enable fault, " + " interrupt, ret %d\n", + (unsigned long long)drhd->reg_base_addr, ret); + return -1; + } + } + + return 0; +} diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 4a4ab651b709..25fc1df486bb 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1799,7 +1799,7 @@ static int __init init_dmars(void) struct dmar_rmrr_unit *rmrr; struct pci_dev *pdev; struct intel_iommu *iommu; - int i, ret, unit = 0; + int i, ret; /* * for each drhd @@ -1921,7 +1921,6 @@ static int __init init_dmars(void) if (drhd->ignored) continue; iommu = drhd->iommu; - sprintf (iommu->name, "dmar%d", unit++); iommu_flush_write_buffer(iommu); diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 5ffa65fffb6a..c38e3f437a81 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -308,7 +308,7 @@ int modify_irte(int irq, struct irte *irte_modified) index = irq_iommu->irte_index + irq_iommu->sub_handle; irte = &iommu->ir_table->base[index]; - set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1)); + set_64bit((unsigned long *)irte, irte_modified->low); __iommu_flush_cache(iommu, irte, sizeof(*irte)); rc = qi_flush_iec(iommu, index, 0); diff --git a/include/linux/dmar.h b/include/linux/dmar.h index f28440784cf0..c7768330c11d 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -49,6 +49,7 @@ extern int dmar_dev_scope_init(void); /* Intel IOMMU detection */ extern void detect_intel_iommu(void); +extern int enable_drhd_fault_handling(void); extern int parse_ioapics_under_ir(void); @@ -116,9 +117,6 @@ extern struct intel_iommu *map_ioapic_to_ir(int apic); #define intr_remapping_enabled (0) #endif -#ifdef CONFIG_DMAR -extern const char *dmar_get_fault_reason(u8 fault_reason); - /* Can't use the common MSI interrupt functions * since DMAR is not a pci device */ @@ -129,6 +127,7 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern int arch_setup_dmar_msi(unsigned int irq); +#ifdef CONFIG_DMAR extern int iommu_detected, no_iommu; extern struct list_head dmar_rmrr_units; struct dmar_rmrr_unit { diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d2e3cbfba14f..a9563840644b 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -292,6 +292,8 @@ struct intel_iommu { spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ int agaw; /* agaw of this iommu */ + unsigned int irq; + unsigned char name[13]; /* Device Name */ #ifdef CONFIG_DMAR unsigned long *domain_ids; /* bitmap of domains */ @@ -299,8 +301,6 @@ struct intel_iommu { spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ - unsigned int irq; - unsigned char name[7]; /* Device Name */ struct iommu_flush flush; #endif struct q_inval *qi; /* Queued invalidation info */ -- cgit v1.2.3 From 7c6d9f9785d156d0438401ef270322da3d5247db Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:59 -0700 Subject: x86, x2apic: use virtual wire A mode in disable_IO_APIC() with interrupt-remapping Impact: make kexec work with x2apic disable_IO_APIC() gets called during crashdump aswell, which configures the IO-APIC/LAPIC so that legacy interrupts can be delivered for the kexec'd kernel. In the presence of interrupt-remapping, we need to change the interrupt-remapping configuration aswell as modifying IO-APIC for virtual wire B mode. To keep things simple during the crash, use virtual wire A mode (for which we don't need to touch io-apic and interrupt-remapping tables). Signed-off-by: Suresh Siddha Cc: Eric W. Biederman Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b18a7734d689..4d975d0e3588 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2040,8 +2040,13 @@ void disable_IO_APIC(void) * If the i8259 is routed through an IOAPIC * Put that IOAPIC in virtual wire mode * so legacy interrupts can be delivered. + * + * With interrupt-remapping, for now we will use virtual wire A mode, + * as virtual wire B is little complex (need to configure both + * IOAPIC RTE aswell as interrupt-remapping table entry). + * As this gets called during crash dump, keep this simple for now. */ - if (ioapic_i8259.pin != -1) { + if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); @@ -2061,7 +2066,10 @@ void disable_IO_APIC(void) ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); } - disconnect_bsp_APIC(ioapic_i8259.pin != -1); + /* + * Use virtual wire A mode when interrupt remapping is enabled. + */ + disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1); } #ifdef CONFIG_X86_32 -- cgit v1.2.3 From cf6567fe40c55e9cffca7355cd34e50fb2871e4e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:05:00 -0700 Subject: x86, x2apic: fix clear_local_APIC() in the presence of x2apic Impact: cleanup, paranoia We were not clearing the local APIC in clear_local_APIC() in the presence of x2apic. Fix it. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/apic.h | 3 +++ arch/x86/include/asm/irq_remapping.h | 2 -- arch/x86/kernel/apic/apic.c | 9 ++------- 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 394d177d721b..6d5b6f0900e1 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -184,6 +184,9 @@ static inline int x2apic_enabled(void) { return 0; } + +#define x2apic 0 + #endif extern int get_physical_broadcast(void); diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 20e1fd588dbf..0396760fccb8 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_IRQ_REMAPPING_H #define _ASM_X86_IRQ_REMAPPING_H -extern int x2apic; - #define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) #endif /* _ASM_X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 30909a258d0f..699f8cf76bbb 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -809,7 +809,7 @@ void clear_local_APIC(void) u32 v; /* APIC hasn't been mapped yet */ - if (!apic_phys) + if (!x2apic && !apic_phys) return; maxlvt = lapic_get_maxlvt(); @@ -1523,12 +1523,10 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { -#ifdef CONFIG_X86_X2APIC if (x2apic) { boot_cpu_physical_apicid = read_apic_id(); return; } -#endif /* * If no local APIC can be found then set up a fake all @@ -1972,12 +1970,9 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); -#ifdef CONFIG_X86_X2APIC if (x2apic) enable_x2apic(); - else -#endif - { + else { /* * Make sure the APICBASE points to the right address * -- cgit v1.2.3 From 0280f7c416c652a2fd95d166f52b199ae61122c0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:05:01 -0700 Subject: x86, x2apic: cleanup the IO-APIC level migration with interrupt-remapping Impact: simplification In the current code, for level triggered migration, we need to modify the io-apic RTE with the update vector information, along with modifying interrupt remapping table entry(IRTE) with vector and destination. This is to ensure that remote IRR bit inthe IOAPIC RTE gets cleared when the cpu does EOI. With this patch, for level triggered, we eliminate the io-apic RTE modification (with the updated vector information), by using a virtual vector (io-apic pin number). Real vector that is used for interrupting cpu will be coming from the interrupt-remapping table entry. Trigger mode in the IRTE will always be edge, and the actual level or edge trigger will be setup in the IO-APIC RTE. So a level triggered interrupt will appear as an edge to the local apic cpu but still as level to the IO-APIC. With this change, level irq migration can be done by simply modifying the interrupt-remapping table entry with out changing the io-apic RTE. And as the interrupt appears as edge at the cpu, in addition to do the local apic EOI, we need to do IO-APIC directed EOI to clear the remote IRR bit in the IO-APIC RTE. This simplies the irq migration in the presence of interrupt-remapping. Idea-by: Rajesh Sankaran Signed-off-by: Suresh Siddha Cc: Eric W. Biederman Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io_apic.h | 2 +- arch/x86/kernel/apic/io_apic.c | 156 +++++++++++++++++------------------------ 2 files changed, 66 insertions(+), 92 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 59cb4a1317b7..ffcd08f96e47 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -172,7 +172,7 @@ extern void probe_nr_irqs_gsi(void); extern int setup_ioapic_entry(int apic, int irq, struct IO_APIC_route_entry *entry, unsigned int destination, int trigger, - int polarity, int vector); + int polarity, int vector, int pin); extern void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e); #else /* !CONFIG_X86_IO_APIC */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4d975d0e3588..e074eac5bd35 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -389,6 +389,8 @@ struct io_apic { unsigned int index; unsigned int unused[3]; unsigned int data; + unsigned int unused2[11]; + unsigned int eoi; }; static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) @@ -397,6 +399,12 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); } +static inline void io_apic_eoi(unsigned int apic, unsigned int vector) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + writel(vector, &io_apic->eoi); +} + static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { struct io_apic __iomem *io_apic = io_apic_base(apic); @@ -1478,7 +1486,7 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t int setup_ioapic_entry(int apic_id, int irq, struct IO_APIC_route_entry *entry, unsigned int destination, int trigger, - int polarity, int vector) + int polarity, int vector, int pin) { /* * add it to the IO-APIC irq-routing table: @@ -1504,7 +1512,14 @@ int setup_ioapic_entry(int apic_id, int irq, irte.present = 1; irte.dst_mode = apic->irq_dest_mode; - irte.trigger_mode = trigger; + /* + * Trigger mode in the IRTE will always be edge, and the + * actual level or edge trigger will be setup in the IO-APIC + * RTE. This will help simplify level triggered irq migration. + * For more details, see the comments above explainig IO-APIC + * irq migration in the presence of interrupt-remapping. + */ + irte.trigger_mode = 0; irte.dlvry_mode = apic->irq_delivery_mode; irte.vector = vector; irte.dest_id = IRTE_DEST(destination); @@ -1515,18 +1530,23 @@ int setup_ioapic_entry(int apic_id, int irq, ir_entry->zero = 0; ir_entry->format = 1; ir_entry->index = (index & 0x7fff); + /* + * IO-APIC RTE will be configured with virtual vector. + * irq handler will do the explicit EOI to the io-apic. + */ + ir_entry->vector = pin; } else #endif { entry->delivery_mode = apic->irq_delivery_mode; entry->dest_mode = apic->irq_dest_mode; entry->dest = destination; + entry->vector = vector; } entry->mask = 0; /* enable IRQ */ entry->trigger = trigger; entry->polarity = polarity; - entry->vector = vector; /* Mask level triggered irqs. * Use IRQ_DELAYED_DISABLE for edge triggered irqs. @@ -1561,7 +1581,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, - dest, trigger, polarity, cfg->vector)) { + dest, trigger, polarity, cfg->vector, pin)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", mp_ioapics[apic_id].apicid, pin); __clear_irq_vector(irq, cfg); @@ -2311,37 +2331,24 @@ static int ioapic_retrigger_irq(unsigned int irq) #ifdef CONFIG_SMP #ifdef CONFIG_INTR_REMAP -static void ir_irq_migration(struct work_struct *work); - -static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); /* * Migrate the IO-APIC irq in the presence of intr-remapping. * - * For edge triggered, irq migration is a simple atomic update(of vector - * and cpu destination) of IRTE and flush the hardware cache. - * - * For level triggered, we need to modify the io-apic RTE aswell with the update - * vector information, along with modifying IRTE with vector and destination. - * So irq migration for level triggered is little bit more complex compared to - * edge triggered migration. But the good news is, we use the same algorithm - * for level triggered migration as we have today, only difference being, - * we now initiate the irq migration from process context instead of the - * interrupt context. + * For both level and edge triggered, irq migration is a simple atomic + * update(of vector and cpu destination) of IRTE and flush the hardware cache. * - * In future, when we do a directed EOI (combined with cpu EOI broadcast - * suppression) to the IO-APIC, level triggered irq migration will also be - * as simple as edge triggered migration and we can do the irq migration - * with a simple atomic update to IO-APIC RTE. + * For level triggered, we eliminate the io-apic RTE modification (with the + * updated vector information), by using a virtual vector (io-apic pin number). + * Real vector that is used for interrupting cpu will be coming from + * the interrupt-remapping table entry. */ static void migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; struct irte irte; - int modify_ioapic_rte; unsigned int dest; - unsigned long flags; unsigned int irq; if (!cpumask_intersects(mask, cpu_online_mask)) @@ -2359,13 +2366,6 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); - modify_ioapic_rte = desc->status & IRQ_LEVEL; - if (modify_ioapic_rte) { - spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); - } - irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -2380,73 +2380,12 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) cpumask_copy(desc->affinity, mask); } -static int migrate_irq_remapped_level_desc(struct irq_desc *desc) -{ - int ret = -1; - struct irq_cfg *cfg = desc->chip_data; - - mask_IO_APIC_irq_desc(desc); - - if (io_apic_level_ack_pending(cfg)) { - /* - * Interrupt in progress. Migrating irq now will change the - * vector information in the IO-APIC RTE and that will confuse - * the EOI broadcast performed by cpu. - * So, delay the irq migration to the next instance. - */ - schedule_delayed_work(&ir_migration_work, 1); - goto unmask; - } - - /* everthing is clear. we have right of way */ - migrate_ioapic_irq_desc(desc, desc->pending_mask); - - ret = 0; - desc->status &= ~IRQ_MOVE_PENDING; - cpumask_clear(desc->pending_mask); - -unmask: - unmask_IO_APIC_irq_desc(desc); - - return ret; -} - -static void ir_irq_migration(struct work_struct *work) -{ - unsigned int irq; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - if (desc->status & IRQ_MOVE_PENDING) { - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->chip->set_affinity || - !(desc->status & IRQ_MOVE_PENDING)) { - desc->status &= ~IRQ_MOVE_PENDING; - spin_unlock_irqrestore(&desc->lock, flags); - continue; - } - - desc->chip->set_affinity(irq, desc->pending_mask); - spin_unlock_irqrestore(&desc->lock, flags); - } - } -} - /* * Migrates the IRQ destination in the process context. */ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { - if (desc->status & IRQ_LEVEL) { - desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(desc->pending_mask, mask); - migrate_irq_remapped_level_desc(desc); - return; - } - migrate_ioapic_irq_desc(desc, mask); } static void set_ir_ioapic_affinity_irq(unsigned int irq, @@ -2537,9 +2476,44 @@ static inline void irq_complete_move(struct irq_desc **descp) {} #endif #ifdef CONFIG_INTR_REMAP +static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + int apic, pin; + struct irq_pin_list *entry; + + entry = cfg->irq_2_pin; + for (;;) { + + if (!entry) + break; + + apic = entry->apic; + pin = entry->pin; + io_apic_eoi(apic, pin); + entry = entry->next; + } +} + +static void +eoi_ioapic_irq(struct irq_desc *desc) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + __eoi_ioapic_irq(irq, cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + static void ack_x2apic_level(unsigned int irq) { + struct irq_desc *desc = irq_to_desc(irq); ack_x2APIC_irq(); + eoi_ioapic_irq(desc); } static void ack_x2apic_edge(unsigned int irq) -- cgit v1.2.3 From 29b61be65a33c95564fa82e7e8d60d97adb68ea8 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:05:02 -0700 Subject: x86, x2apic: cleanup ifdef CONFIG_INTR_REMAP in io_apic code Impact: cleanup Clean up #ifdefs and replace them with helper functions. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 44 +++++++++------------------------------- arch/x86/kernel/apic/probe_64.c | 2 -- include/linux/dmar.h | 45 ++++++++++++++++++++++++++++++++++------- 3 files changed, 48 insertions(+), 43 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e074eac5bd35..cf27795c641c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -554,16 +554,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq apic = entry->apic; pin = entry->pin; -#ifdef CONFIG_INTR_REMAP /* * With interrupt-remapping, destination information comes * from interrupt-remapping table entry. */ if (!irq_remapped(irq)) io_apic_write(apic, 0x11 + pin*2, dest); -#else - io_apic_write(apic, 0x11 + pin*2, dest); -#endif reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -1419,9 +1415,8 @@ void __setup_vector_irq(int cpu) } static struct irq_chip ioapic_chip; -#ifdef CONFIG_INTR_REMAP static struct irq_chip ir_ioapic_chip; -#endif +static struct irq_chip msi_ir_chip; #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 @@ -1460,7 +1455,6 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t else desc->status &= ~IRQ_LEVEL; -#ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { desc->status |= IRQ_MOVE_PCNTXT; if (trigger) @@ -1472,7 +1466,7 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t handle_edge_irq, "edge"); return; } -#endif + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) set_irq_chip_and_handler_name(irq, &ioapic_chip, @@ -1493,7 +1487,6 @@ int setup_ioapic_entry(int apic_id, int irq, */ memset(entry,0,sizeof(*entry)); -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) { struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); struct irte irte; @@ -1535,9 +1528,7 @@ int setup_ioapic_entry(int apic_id, int irq, * irq handler will do the explicit EOI to the io-apic. */ ir_entry->vector = pin; - } else -#endif - { + } else { entry->delivery_mode = apic->irq_delivery_mode; entry->dest_mode = apic->irq_dest_mode; entry->dest = destination; @@ -1662,10 +1653,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, { struct IO_APIC_route_entry entry; -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) return; -#endif memset(&entry, 0, sizeof(entry)); @@ -2395,6 +2384,11 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq, set_ir_ioapic_affinity_irq_desc(desc, mask); } +#else +static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, + const struct cpumask *mask) +{ +} #endif asmlinkage void smp_irq_move_cleanup_interrupt(void) @@ -2883,10 +2877,8 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) panic("BIOS bug: timer not connected to IO-APIC"); -#endif pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2922,10 +2914,8 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) panic("timer doesn't work through Interrupt-remapped IO-APIC"); -#endif local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) @@ -3219,9 +3209,7 @@ void destroy_irq(unsigned int irq) if (desc) desc->chip_data = cfg; -#ifdef CONFIG_INTR_REMAP free_irte(irq); -#endif spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); spin_unlock_irqrestore(&vector_lock, flags); @@ -3247,7 +3235,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); -#ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { struct irte irte; int ir_index; @@ -3273,9 +3260,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms MSI_ADDR_IR_SHV | MSI_ADDR_IR_INDEX1(ir_index) | MSI_ADDR_IR_INDEX2(ir_index); - } else -#endif - { + } else { if (x2apic_enabled()) msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); @@ -3392,6 +3377,7 @@ static struct irq_chip msi_ir_chip = { #endif .retrigger = ioapic_retrigger_irq, }; +#endif /* * Map the PCI dev to the corresponding remapping hardware unit @@ -3419,7 +3405,6 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) } return index; } -#endif static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) { @@ -3433,7 +3418,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) set_irq_msi(irq, msidesc); write_msi_msg(irq, &msg); -#ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { struct irq_desc *desc = irq_to_desc(irq); /* @@ -3442,7 +3426,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) desc->status |= IRQ_MOVE_PCNTXT; set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); } else -#endif set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); @@ -3456,11 +3439,8 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int ret, sub_handle; struct msi_desc *msidesc; unsigned int irq_want; - -#ifdef CONFIG_INTR_REMAP struct intel_iommu *iommu = 0; int index = 0; -#endif irq_want = nr_irqs_gsi; sub_handle = 0; @@ -3469,7 +3449,6 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (irq == 0) return -1; irq_want = irq + 1; -#ifdef CONFIG_INTR_REMAP if (!intr_remapping_enabled) goto no_ir; @@ -3497,7 +3476,6 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) set_irte_irq(irq, iommu, index, sub_handle); } no_ir: -#endif ret = setup_msi_irq(dev, msidesc, irq); if (ret < 0) goto error; @@ -4032,11 +4010,9 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) set_ir_ioapic_affinity_irq_desc(desc, mask); else -#endif set_ioapic_affinity_irq_desc(desc, mask); } diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 8297c2b8ed20..1783652bb0e5 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -69,14 +69,12 @@ void __init default_setup_apic_routing(void) printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } -#ifdef CONFIG_X86_X2APIC /* * Now that apic routing model is selected, configure the * fault handling for intr remapping. */ if (intr_remapping_enabled) enable_drhd_fault_handling(); -#endif } /* Same for both flat and physical. */ diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8a035aec14a9..2f3427468956 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -26,9 +26,8 @@ #include #include -#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct intel_iommu; - +#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -52,7 +51,6 @@ extern int dmar_dev_scope_init(void); extern void detect_intel_iommu(void); extern int enable_drhd_fault_handling(void); - extern int parse_ioapics_under_ir(void); extern int alloc_iommu(struct dmar_drhd_unit *); #else @@ -65,12 +63,12 @@ static inline int dmar_table_init(void) { return -ENODEV; } +static inline int enable_drhd_fault_handling(void) +{ + return -1; +} #endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */ -#ifdef CONFIG_INTR_REMAP -extern int intr_remapping_enabled; -extern int enable_intr_remapping(int); - struct irte { union { struct { @@ -99,6 +97,10 @@ struct irte { __u64 high; }; }; +#ifdef CONFIG_INTR_REMAP +extern int intr_remapping_enabled; +extern int enable_intr_remapping(int); + extern int get_irte(int irq, struct irte *entry); extern int modify_irte(int irq, struct irte *irte_modified); extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); @@ -113,6 +115,35 @@ extern int irq_remapped(int irq); extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); #else +static inline int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) +{ + return -1; +} +static inline int modify_irte(int irq, struct irte *irte_modified) +{ + return -1; +} +static inline int free_irte(int irq) +{ + return -1; +} +static inline int map_irq_to_irte_handle(int irq, u16 *sub_handle) +{ + return -1; +} +static inline int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, + u16 sub_handle) +{ + return -1; +} +static inline struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) +{ + return NULL; +} +static inline struct intel_iommu *map_ioapic_to_ir(int apic) +{ + return NULL; +} #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) #define intr_remapping_enabled (0) -- cgit v1.2.3 From 05c3dc2c4b60387769cbe73174347de4cf85f0c9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:05:03 -0700 Subject: x86, ioapic: Fix non atomic allocation with interrupts disabled Impact: fix possible race save_mask_IO_APIC_setup() was using non atomic memory allocation while getting called with interrupts disabled. Fix this by splitting this into two different function. Allocation part save_IO_APIC_setup() now happens before disabling interrupts. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io_apic.h | 3 ++- arch/x86/kernel/apic/apic.c | 11 ++++++----- arch/x86/kernel/apic/io_apic.c | 34 +++++++++++++++++++++++----------- 3 files changed, 31 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index ffcd08f96e47..373cc2bbcad2 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -162,7 +162,8 @@ extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); #ifdef CONFIG_X86_64 -extern int save_mask_IO_APIC_setup(void); +extern int save_IO_APIC_setup(void); +extern void mask_IO_APIC_setup(void); extern void restore_IO_APIC_setup(void); extern void reinit_intr_remapped_IO_APIC(int); #endif diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 699f8cf76bbb..85eb8e100818 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1334,15 +1334,16 @@ void __init enable_IR_x2apic(void) return; } - local_irq_save(flags); - mask_8259A(); - - ret = save_mask_IO_APIC_setup(); + ret = save_IO_APIC_setup(); if (ret) { pr_info("Saving IO-APIC state failed: %d\n", ret); goto end; } + local_irq_save(flags); + mask_IO_APIC_setup(); + mask_8259A(); + ret = enable_intr_remapping(1); if (ret && x2apic_preenabled) { @@ -1367,10 +1368,10 @@ end_restore: else reinit_intr_remapped_IO_APIC(x2apic_preenabled); -end: unmask_8259A(); local_irq_restore(flags); +end: if (!ret) { if (!x2apic_preenabled) pr_info("Enabled x2apic and interrupt-remapping\n"); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index cf27795c641c..ff1759a1128e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -853,9 +853,9 @@ __setup("pirq=", ioapic_pirq_setup); static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; /* - * Saves and masks all the unmasked IO-APIC RTE's + * Saves all the IO-APIC RTE's */ -int save_mask_IO_APIC_setup(void) +int save_IO_APIC_setup(void) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -880,16 +880,9 @@ int save_mask_IO_APIC_setup(void) } for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - - entry = early_ioapic_entries[apic][pin] = + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + early_ioapic_entries[apic][pin] = ioapic_read_entry(apic, pin); - if (!entry.mask) { - entry.mask = 1; - ioapic_write_entry(apic, pin, entry); - } - } return 0; @@ -902,6 +895,25 @@ nomem: return -ENOMEM; } +void mask_IO_APIC_setup(void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) { + if (!early_ioapic_entries[apic]) + break; + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + + entry = early_ioapic_entries[apic][pin]; + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + } + } + } +} + void restore_IO_APIC_setup(void) { int apic, pin; -- cgit v1.2.3 From 68a8ca593fac82e336a792226272455901fa83df Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:05:04 -0700 Subject: x86: fix broken irq migration logic while cleaning up multiple vectors Impact: fix spurious IRQs During irq migration, we send a low priority interrupt to the previous irq destination. This happens in non interrupt-remapping case after interrupt starts arriving at new destination and in interrupt-remapping case after modifying and flushing the interrupt-remapping table entry caches. This low priority irq cleanup handler can cleanup multiple vectors, as multiple irq's can be migrated at almost the same time. While there will be multiple invocations of irq cleanup handler (one cleanup IPI for each irq migration), first invocation of the cleanup handler can potentially cleanup more than one vector (as the first invocation can see the requests for more than vector cleanup). When we cleanup multiple vectors during the first invocation of the smp_irq_move_cleanup_interrupt(), other vectors that are to be cleanedup can still be pending in the local cpu's IRR (as smp_irq_move_cleanup_interrupt() runs with interrupts disabled). When we are ready to unhook a vector corresponding to an irq, check if that vector is registered in the local cpu's IRR. If so skip that cleanup and do a self IPI with the cleanup vector, so that we give a chance to service the pending vector interrupt and then cleanup that vector allocation once we execute the lowest priority handler. This fixes spurious interrupts seen when migrating multiple vectors at the same time. [ This is apparently possible even on conventional xapic, although to the best of our knowledge it has never been seen. The stable maintainers may wish to consider this one for -stable. ] Signed-off-by: Suresh Siddha Cc: Eric W. Biederman Signed-off-by: H. Peter Anvin Cc: stable@kernel.org --- arch/x86/kernel/apic/io_apic.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ff1759a1128e..42cdc78427a2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2414,6 +2414,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) me = smp_processor_id(); for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { unsigned int irq; + unsigned int irr; struct irq_desc *desc; struct irq_cfg *cfg; irq = __get_cpu_var(vector_irq)[vector]; @@ -2433,6 +2434,18 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + /* + * Check if the vector that needs to be cleanedup is + * registered at the cpu's IRR. If so, then this is not + * the best time to clean it up. Lets clean it up in the + * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR + * to myself. + */ + if (irr & (1 << (vector % 32))) { + apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); + goto unlock; + } __get_cpu_var(vector_irq)[vector] = -1; cfg->move_cleanup_count--; unlock: -- cgit v1.2.3 From a6b6a14e0c60561f2902b078bd28d0e61defad70 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 18 Mar 2009 10:40:25 +1030 Subject: x86: use smp_call_function_single() in arch/x86/kernel/cpu/mcheck/mce_amd_64.c Attempting to rid us of the problematic work_on_cpu(). Just use smp_call_function_single() here. Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell LKML-Reference: <20090318042217.EF3F1DDF39@ozlabs.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 40 ++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index c5a32f92d07e..7d01be868870 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -92,7 +92,8 @@ struct thresh_restart { }; /* must be called with correct cpu affinity */ -static long threshold_restart_bank(void *_tr) +/* Called via smp_call_function_single() */ +static void threshold_restart_bank(void *_tr) { struct thresh_restart *tr = _tr; u32 mci_misc_hi, mci_misc_lo; @@ -119,7 +120,6 @@ static long threshold_restart_bank(void *_tr) mci_misc_hi |= MASK_COUNT_EN_HI; wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); - return 0; } /* cpu init entry point, called from mce.c with preempt off */ @@ -279,7 +279,7 @@ static ssize_t store_interrupt_enable(struct threshold_block *b, tr.b = b; tr.reset = 0; tr.old_limit = 0; - work_on_cpu(b->cpu, threshold_restart_bank, &tr); + smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); return end - buf; } @@ -301,23 +301,32 @@ static ssize_t store_threshold_limit(struct threshold_block *b, tr.b = b; tr.reset = 0; - work_on_cpu(b->cpu, threshold_restart_bank, &tr); + smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); return end - buf; } -static long local_error_count(void *_b) +struct threshold_block_cross_cpu { + struct threshold_block *tb; + long retval; +}; + +static void local_error_count_handler(void *_tbcc) { - struct threshold_block *b = _b; + struct threshold_block_cross_cpu *tbcc = _tbcc; + struct threshold_block *b = tbcc->tb; u32 low, high; rdmsr(b->address, low, high); - return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); + tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); } static ssize_t show_error_count(struct threshold_block *b, char *buf) { - return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b)); + struct threshold_block_cross_cpu tbcc = { .tb = b, }; + + smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1); + return sprintf(buf, "%lx\n", tbcc.retval); } static ssize_t store_error_count(struct threshold_block *b, @@ -325,7 +334,7 @@ static ssize_t store_error_count(struct threshold_block *b, { struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; - work_on_cpu(b->cpu, threshold_restart_bank, &tr); + smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); return 1; } @@ -394,7 +403,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) return 0; - if (rdmsr_safe(address, &low, &high)) + if (rdmsr_safe_on_cpu(cpu, address, &low, &high)) return 0; if (!(high & MASK_VALID_HI)) { @@ -458,12 +467,11 @@ out_free: return err; } -static __cpuinit long local_allocate_threshold_blocks(void *_bank) +static __cpuinit long +local_allocate_threshold_blocks(int cpu, unsigned int bank) { - unsigned int *bank = _bank; - - return allocate_threshold_blocks(smp_processor_id(), *bank, 0, - MSR_IA32_MC0_MISC + *bank * 4); + return allocate_threshold_blocks(cpu, bank, 0, + MSR_IA32_MC0_MISC + bank * 4); } /* symlinks sibling shared banks to first core. first core owns dir/files. */ @@ -526,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) per_cpu(threshold_banks, cpu)[bank] = b; - err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank); + err = local_allocate_threshold_blocks(cpu, bank); if (err) goto out_free; -- cgit v1.2.3 From ce4e240c279a31096f74afa6584a62d64a1ba8c8 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 17 Mar 2009 10:16:54 -0800 Subject: x86: add x2apic_wrmsr_fence() to x2apic flush tlb paths Impact: optimize APIC IPI related barriers Uncached MMIO accesses for xapic are inherently serializing and hence we don't need explicit barriers for xapic IPI paths. x2apic MSR writes/reads don't have serializing semantics and hence need a serializing instruction or mfence, to make all the previous memory stores globally visisble before the x2apic msr write for IPI. Add x2apic_wrmsr_fence() in flush tlb path to x2apic specific paths. Signed-off-by: Suresh Siddha Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Jens Axboe Cc: Linus Torvalds Cc: "Paul E. McKenney" Cc: Rusty Russell Cc: Steven Rostedt Cc: "steiner@sgi.com" Cc: Nick Piggin LKML-Reference: <1237313814.27006.203.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 10 ++++++++++ arch/x86/kernel/apic/x2apic_cluster.c | 6 ++++++ arch/x86/kernel/apic/x2apic_phys.c | 6 ++++++ arch/x86/mm/tlb.c | 5 ----- 4 files changed, 22 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 6d5b6f0900e1..00f5962d82d0 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -108,6 +108,16 @@ extern void native_apic_icr_write(u32 low, u32 id); extern u64 native_apic_icr_read(void); #ifdef CONFIG_X86_X2APIC +/* + * Make previous memory operations globally visible before + * sending the IPI through x2apic wrmsr. We need a serializing instruction or + * mfence for this. + */ +static inline void x2apic_wrmsr_fence(void) +{ + asm volatile("mfence" : : : "memory"); +} + static inline void native_apic_msr_write(u32 reg, u32 v) { if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8fb87b6dd633..4a903e2f0d17 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -57,6 +57,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_cpu(query_cpu, mask) { __x2apic_send_IPI_dest( @@ -73,6 +75,8 @@ static void unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_cpu(query_cpu, mask) { if (query_cpu == this_cpu) @@ -90,6 +94,8 @@ static void x2apic_send_IPI_allbutself(int vector) unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_online_cpu(query_cpu) { if (query_cpu == this_cpu) diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 23625b9f98b2..a284359627e7 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -58,6 +58,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_cpu(query_cpu, mask) { __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), @@ -73,6 +75,8 @@ static void unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_cpu(query_cpu, mask) { if (query_cpu != this_cpu) @@ -89,6 +93,8 @@ static void x2apic_send_IPI_allbutself(int vector) unsigned long query_cpu; unsigned long flags; + x2apic_wrmsr_fence(); + local_irq_save(flags); for_each_online_cpu(query_cpu) { if (query_cpu == this_cpu) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a654d59e4483..821e97017e95 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -186,11 +186,6 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id())); - /* - * Make the above memory operations globally visible before - * sending the IPI. - */ - smp_mb(); /* * We have to send the IPI only to * CPUs affected. -- cgit v1.2.3 From 2c74d66624ddbda8101d54d1e184cf9229b378bc Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 18 Mar 2009 08:22:30 +1030 Subject: x86, uv: fix cpumask iterator in uv_bau_init() Impact: fix boot crash on UV systems Commit 76ba0ecda0de9accea9a91cb6dbde46782110e1c "cpumask: use cpumask_var_t in uv_flush_tlb_others" used cur_cpu as an iterator; it was supposed to be zero for the code below it. Reported-by: Cliff Wickman Original-From: Cliff Wickman Signed-off-by: Rusty Russell Acked-by: Mike Travis Cc: steiner@sgi.com Cc: LKML-Reference: <200903180822.31196.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_uv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index d038b9c45cf8..79c073247284 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -750,7 +750,7 @@ static int __init uv_bau_init(void) int node; int nblades; int last_blade; - int cur_cpu = 0; + int cur_cpu; if (!is_uv_system()) return 0; @@ -760,6 +760,7 @@ static int __init uv_bau_init(void) uv_mmask = (1UL << uv_hub_info->n_val) - 1; nblades = 0; last_blade = -1; + cur_cpu = 0; for_each_online_node(node) { blade = uv_node_to_blade_id(node); if (blade == last_blade) -- cgit v1.2.3 From 4e16c888754468a58d4829c2eba2c6aa3a065e2b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 18 Mar 2009 17:36:55 +0530 Subject: x86: cpu/mttr/cleanup.c fix compilation warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/x86/kernel/cpu/mtrr/cleanup.c:197: warning: format ‘%d’ expects type ‘int’, but argument 2 has type ‘long unsigned int’ Signed-off-by: Jaswinder Singh Rajput Cc: Yinghai Lu LKML-Reference: <1237378015.13488.1.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/cleanup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index f4f89fbc228f..ce0fe4b5c04f 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -160,8 +160,9 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, unsigned long extra_remove_base, unsigned long extra_remove_size) { - unsigned long i, base, size; + unsigned long base, size; mtrr_type type; + int i; for (i = 0; i < num_var_ranges; i++) { type = range_state[i].type; -- cgit v1.2.3 From cde5edbda8ba7d600154ce4171125a48e4d2a21b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 18 Mar 2009 17:37:45 +0530 Subject: x86: kprobes.c fix compilation warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/x86/kernel/kprobes.c:196: warning: passing argument 1 of ‘search_exception_tables’ makes integer from pointer without a cast Signed-off-by: Jaswinder Singh Rajput Cc: Linus Torvalds LKML-Reference:<49BED952.2050809@redhat.com> LKML-Reference: <1237378065.13488.2.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 4558dd3918cf..55b94614e348 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -193,7 +193,7 @@ static int __kprobes can_boost(kprobe_opcode_t *opcodes) kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; - if (search_exception_tables(opcodes)) + if (search_exception_tables((unsigned long)opcodes)) return 0; /* Page fault may occur on this address. */ retry: -- cgit v1.2.3 From a6830278568a8bb9758aac152db15187741e0113 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 18 Mar 2009 20:42:28 +0530 Subject: x86: mpparse: clean up code by introducing a few helper functions Impact: cleanup Refactor the MP-table parsing code via the introduction of the following helper functions: skip_entry() smp_reserve_bootmem() check_irq_src() check_slot() To simplify the code flow and to reduce the size of the following oversized functions: smp_read_mpc(), smp_scan_config(). There should be no impact to functionality. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 261 +++++++++++++++++++++------------------------- 1 file changed, 120 insertions(+), 141 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 47673e02ae58..58ddf6259afb 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -109,9 +109,6 @@ static void __init MP_bus_info(struct mpc_bus *m) } else printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); } -#endif - -#ifdef CONFIG_X86_IO_APIC static int bad_ioapic(unsigned long address) { @@ -224,8 +221,12 @@ static void __init MP_intsrc_info(struct mpc_intsrc *m) if (++mp_irq_entries == MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!!\n"); } +#else /* CONFIG_X86_IO_APIC */ +static inline void __init MP_bus_info(struct mpc_bus *m) {} +static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {} +static inline void __init MP_intsrc_info(struct mpc_intsrc *m) {} +#endif /* CONFIG_X86_IO_APIC */ -#endif static void __init MP_lintsrc_info(struct mpc_lintsrc *m) { @@ -275,6 +276,12 @@ static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str) return 1; } +static void skip_entry(unsigned char **ptr, int *count, int size) +{ + *ptr += size; + *count += size; +} + static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) { char str[16]; @@ -310,55 +317,27 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) while (count < mpc->length) { switch (*mpt) { case MP_PROCESSOR: - { - struct mpc_cpu *m = (struct mpc_cpu *)mpt; - /* ACPI may have already provided this data */ - if (!acpi_lapic) - MP_processor_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } + /* ACPI may have already provided this data */ + if (!acpi_lapic) + MP_processor_info((struct mpc_cpu *)&mpt); + skip_entry(&mpt, &count, sizeof(struct mpc_cpu)); + break; case MP_BUS: - { - struct mpc_bus *m = (struct mpc_bus *)mpt; -#ifdef CONFIG_X86_IO_APIC - MP_bus_info(m); -#endif - mpt += sizeof(*m); - count += sizeof(*m); - break; - } + MP_bus_info((struct mpc_bus *)&mpt); + skip_entry(&mpt, &count, sizeof(struct mpc_bus)); + break; case MP_IOAPIC: - { -#ifdef CONFIG_X86_IO_APIC - struct mpc_ioapic *m = (struct mpc_ioapic *)mpt; - MP_ioapic_info(m); -#endif - mpt += sizeof(struct mpc_ioapic); - count += sizeof(struct mpc_ioapic); - break; - } + MP_ioapic_info((struct mpc_ioapic *)&mpt); + skip_entry(&mpt, &count, sizeof(struct mpc_ioapic)); + break; case MP_INTSRC: - { -#ifdef CONFIG_X86_IO_APIC - struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; - - MP_intsrc_info(m); -#endif - mpt += sizeof(struct mpc_intsrc); - count += sizeof(struct mpc_intsrc); - break; - } + MP_intsrc_info((struct mpc_intsrc *)&mpt); + skip_entry(&mpt, &count, sizeof(struct mpc_intsrc)); + break; case MP_LINTSRC: - { - struct mpc_lintsrc *m = - (struct mpc_lintsrc *)mpt; - MP_lintsrc_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } + MP_lintsrc_info((struct mpc_lintsrc *)&mpt); + skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc)); + break; default: /* wrong mptable */ printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); @@ -689,6 +668,31 @@ void __init get_smp_config(void) __get_smp_config(0); } +static void smp_reserve_bootmem(struct mpf_intel *mpf) +{ + unsigned long size = get_mpc_size(mpf->physptr); +#ifdef CONFIG_X86_32 + /* + * We cannot access to MPC table to compute table size yet, + * as only few megabytes from the bottom is mapped now. + * PC-9800's MPC table places on the very last of physical + * memory; so that simply reserving PAGE_SIZE from mpf->physptr + * yields BUG() in reserve_bootmem. + * also need to make sure physptr is below than max_low_pfn + * we don't need reserve the area above max_low_pfn + */ + unsigned long end = max_low_pfn * PAGE_SIZE; + + if (mpf->physptr < end) { + if (mpf->physptr + size > end) + size = end - mpf->physptr; + reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); + } +#else + reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); +#endif +} + static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned reserve) { @@ -717,35 +721,9 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, if (!reserve) return 1; reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf), - BOOTMEM_DEFAULT); - if (mpf->physptr) { - unsigned long size = get_mpc_size(mpf->physptr); -#ifdef CONFIG_X86_32 - /* - * We cannot access to MPC table to compute - * table size yet, as only few megabytes from - * the bottom is mapped now. - * PC-9800's MPC table places on the very last - * of physical memory; so that simply reserving - * PAGE_SIZE from mpf->physptr yields BUG() - * in reserve_bootmem. - * also need to make sure physptr is below than - * max_low_pfn - * we don't need reserve the area above max_low_pfn - */ - unsigned long end = max_low_pfn * PAGE_SIZE; - - if (mpf->physptr < end) { - if (mpf->physptr + size > end) - size = end - mpf->physptr; - reserve_bootmem_generic(mpf->physptr, size, - BOOTMEM_DEFAULT); - } -#else - reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); -#endif - } + if (mpf->physptr) + smp_reserve_bootmem(mpf); return 1; } @@ -848,7 +826,57 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m) #define SPARE_SLOT_NUM 20 static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; -#endif + +static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) +{ + int i; + + apic_printk(APIC_VERBOSE, "OLD "); + print_MP_intsrc_info(m); + + i = get_MP_intsrc_index(m); + if (i > 0) { + assign_to_mpc_intsrc(&mp_irqs[i], m); + apic_printk(APIC_VERBOSE, "NEW "); + print_mp_irq_info(&mp_irqs[i]); + return; + } + if (!i) { + /* legacy, do nothing */ + return; + } + if (*nr_m_spare < SPARE_SLOT_NUM) { + /* + * not found (-1), or duplicated (-2) are invalid entries, + * we need to use the slot later + */ + m_spare[*nr_m_spare] = m; + *nr_m_spare += 1; + } +} +#else /* CONFIG_X86_IO_APIC */ +static inline void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} +#endif /* CONFIG_X86_IO_APIC */ + +static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, + int count) +{ + if (!mpc_new_phys) { + pr_info("No spare slots, try to append...take your risk, " + "new mpc_length %x\n", count); + } else { + if (count <= mpc_new_length) + pr_info("No spare slots, try to append..., " + "new mpc_length %x\n", count); + else { + pr_err("mpc_new_length %lx is too small\n", + mpc_new_length); + return -1; + } + } + + return 0; +} static int __init replace_intsrc_all(struct mpc_table *mpc, unsigned long mpc_new_phys, @@ -856,71 +884,30 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, { #ifdef CONFIG_X86_IO_APIC int i; - int nr_m_spare = 0; #endif - int count = sizeof(*mpc); + int nr_m_spare = 0; unsigned char *mpt = ((unsigned char *)mpc) + count; printk(KERN_INFO "mpc_length %x\n", mpc->length); while (count < mpc->length) { switch (*mpt) { case MP_PROCESSOR: - { - struct mpc_cpu *m = (struct mpc_cpu *)mpt; - mpt += sizeof(*m); - count += sizeof(*m); - break; - } + skip_entry(&mpt, &count, sizeof(struct mpc_cpu)); + break; case MP_BUS: - { - struct mpc_bus *m = (struct mpc_bus *)mpt; - mpt += sizeof(*m); - count += sizeof(*m); - break; - } + skip_entry(&mpt, &count, sizeof(struct mpc_bus)); + break; case MP_IOAPIC: - { - mpt += sizeof(struct mpc_ioapic); - count += sizeof(struct mpc_ioapic); - break; - } + skip_entry(&mpt, &count, sizeof(struct mpc_ioapic)); + break; case MP_INTSRC: - { -#ifdef CONFIG_X86_IO_APIC - struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; - - apic_printk(APIC_VERBOSE, "OLD "); - print_MP_intsrc_info(m); - i = get_MP_intsrc_index(m); - if (i > 0) { - assign_to_mpc_intsrc(&mp_irqs[i], m); - apic_printk(APIC_VERBOSE, "NEW "); - print_mp_irq_info(&mp_irqs[i]); - } else if (!i) { - /* legacy, do nothing */ - } else if (nr_m_spare < SPARE_SLOT_NUM) { - /* - * not found (-1), or duplicated (-2) - * are invalid entries, - * we need to use the slot later - */ - m_spare[nr_m_spare] = m; - nr_m_spare++; - } -#endif - mpt += sizeof(struct mpc_intsrc); - count += sizeof(struct mpc_intsrc); - break; - } + check_irq_src((struct mpc_intsrc *)&mpt, &nr_m_spare); + skip_entry(&mpt, &count, sizeof(struct mpc_intsrc)); + break; case MP_LINTSRC: - { - struct mpc_lintsrc *m = - (struct mpc_lintsrc *)mpt; - mpt += sizeof(*m); - count += sizeof(*m); - break; - } + skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc)); + break; default: /* wrong mptable */ printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); @@ -950,16 +937,8 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, } else { struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; count += sizeof(struct mpc_intsrc); - if (!mpc_new_phys) { - printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); - } else { - if (count <= mpc_new_length) - printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count); - else { - printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length); - goto out; - } - } + if (!check_slot(mpc_new_phys, mpc_new_length, count)) + goto out; assign_to_mpc_intsrc(&mp_irqs[i], m); mpc->length = count; mpt += sizeof(struct mpc_intsrc); -- cgit v1.2.3 From c58603e81b3ed4f1c7352e091fe43fd0bd8d06cc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 19 Mar 2009 08:50:35 +0100 Subject: x86: mpparse: clean up code by introducing a few helper functions, fix Impact: fix boot crash This fixes commit a6830278568a8bb9758aac152db15187741e0113. Signed-off-by: Jaswinder Singh Rajput Cc: Yinghai Lu LKML-Reference: <1237403503.22438.21.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 58ddf6259afb..290cb57f4697 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -319,23 +319,23 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) case MP_PROCESSOR: /* ACPI may have already provided this data */ if (!acpi_lapic) - MP_processor_info((struct mpc_cpu *)&mpt); + MP_processor_info((struct mpc_cpu *)mpt); skip_entry(&mpt, &count, sizeof(struct mpc_cpu)); break; case MP_BUS: - MP_bus_info((struct mpc_bus *)&mpt); + MP_bus_info((struct mpc_bus *)mpt); skip_entry(&mpt, &count, sizeof(struct mpc_bus)); break; case MP_IOAPIC: - MP_ioapic_info((struct mpc_ioapic *)&mpt); + MP_ioapic_info((struct mpc_ioapic *)mpt); skip_entry(&mpt, &count, sizeof(struct mpc_ioapic)); break; case MP_INTSRC: - MP_intsrc_info((struct mpc_intsrc *)&mpt); + MP_intsrc_info((struct mpc_intsrc *)mpt); skip_entry(&mpt, &count, sizeof(struct mpc_intsrc)); break; case MP_LINTSRC: - MP_lintsrc_info((struct mpc_lintsrc *)&mpt); + MP_lintsrc_info((struct mpc_lintsrc *)mpt); skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc)); break; default: @@ -902,7 +902,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, skip_entry(&mpt, &count, sizeof(struct mpc_ioapic)); break; case MP_INTSRC: - check_irq_src((struct mpc_intsrc *)&mpt, &nr_m_spare); + check_irq_src((struct mpc_intsrc *)mpt, &nr_m_spare); skip_entry(&mpt, &count, sizeof(struct mpc_intsrc)); break; case MP_LINTSRC: -- cgit v1.2.3 From 71ff49d71bb5cfcd2689b54cb433c0e6990a1d86 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 18 Mar 2009 13:03:33 -0700 Subject: x86: with the last user gone, remove set_pte_present Impact: cleanup set_pte_present() is no longer used, directly or indirectly, so remove it. Signed-off-by: Jeremy Fitzhardinge Cc: Xen-devel Cc: Jeremy Fitzhardinge Cc: Alok Kataria Cc: Marcelo Tosatti Cc: Avi Kivity LKML-Reference: <1237406613-2929-2-git-send-email-jeremy@goop.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 15 --------------- arch/x86/include/asm/pgtable-2level.h | 7 ------- arch/x86/include/asm/pgtable-3level.h | 17 ----------------- arch/x86/include/asm/pgtable.h | 2 -- arch/x86/kernel/kvm.c | 7 ------- arch/x86/kernel/paravirt.c | 1 - arch/x86/kernel/vmi_32.c | 6 ------ arch/x86/xen/mmu.c | 1 - 8 files changed, 56 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 31fe83b10a4f..7727aa8b7dda 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -317,8 +317,6 @@ struct pv_mmu_ops { #if PAGETABLE_LEVELS >= 3 #ifdef CONFIG_X86_PAE void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); - void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void (*pmd_clear)(pmd_t *pmdp); @@ -1365,13 +1363,6 @@ static inline void set_pte_atomic(pte_t *ptep, pte_t pte) pte.pte, pte.pte >> 32); } -static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - /* 5 arg words */ - pv_mmu_ops.set_pte_present(mm, addr, ptep, pte); -} - static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { @@ -1388,12 +1379,6 @@ static inline void set_pte_atomic(pte_t *ptep, pte_t pte) set_pte(ptep, pte); } -static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - set_pte(ptep, pte); -} - static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index c1774ac9da7a..2334982b339e 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -26,13 +26,6 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) native_set_pte(ptep, pte); } -static inline void native_set_pte_present(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep, pte_t pte) -{ - native_set_pte(ptep, pte); -} - static inline void native_pmd_clear(pmd_t *pmdp) { native_set_pmd(pmdp, __pmd(0)); diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 3f13cdf61156..177b0165ea01 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -31,23 +31,6 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) ptep->pte_low = pte.pte_low; } -/* - * Since this is only called on user PTEs, and the page fault handler - * must handle the already racy situation of simultaneous page faults, - * we are justified in merely clearing the PTE present bit, followed - * by a set. The ordering here is important. - */ -static inline void native_set_pte_present(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep, pte_t pte) -{ - ptep->pte_low = 0; - smp_wmb(); - ptep->pte_high = pte.pte_high; - smp_wmb(); - ptep->pte_low = pte.pte_low; -} - static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d0812e155f1d..29d96d168bc0 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -31,8 +31,6 @@ extern struct list_head pgd_list; #define set_pte(ptep, pte) native_set_pte(ptep, pte) #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) -#define set_pte_present(mm, addr, ptep, pte) \ - native_set_pte_present(mm, addr, ptep, pte) #define set_pte_atomic(ptep, pte) \ native_set_pte_atomic(ptep, pte) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 478bca986eca..33019ddb56b4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -138,12 +138,6 @@ static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) kvm_mmu_write(ptep, pte_val(pte)); } -static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - kvm_mmu_write(ptep, pte_val(pte)); -} - static void kvm_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { @@ -220,7 +214,6 @@ static void paravirt_ops_setup(void) #if PAGETABLE_LEVELS >= 3 #ifdef CONFIG_X86_PAE pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; - pv_mmu_ops.set_pte_present = kvm_set_pte_present; pv_mmu_ops.pte_clear = kvm_pte_clear; pv_mmu_ops.pmd_clear = kvm_pmd_clear; #endif diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 63dd358d8ee1..8e45f4464880 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -470,7 +470,6 @@ struct pv_mmu_ops pv_mmu_ops = { #if PAGETABLE_LEVELS >= 3 #ifdef CONFIG_X86_PAE .set_pte_atomic = native_set_pte_atomic, - .set_pte_present = native_set_pte_present, .pte_clear = native_pte_clear, .pmd_clear = native_pmd_clear, #endif diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 2cc4a90e2cb3..95deb9f2211e 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -395,11 +395,6 @@ static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval) vmi_ops.update_pte(ptep, VMI_PAGE_PT); } -static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) -{ - vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); -} - static void vmi_set_pud(pud_t *pudp, pud_t pudval) { /* Um, eww */ @@ -750,7 +745,6 @@ static inline int __init activate_vmi(void) pv_mmu_ops.set_pmd = vmi_set_pmd; #ifdef CONFIG_X86_PAE pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic; - pv_mmu_ops.set_pte_present = vmi_set_pte_present; pv_mmu_ops.set_pud = vmi_set_pud; pv_mmu_ops.pte_clear = vmi_pte_clear; pv_mmu_ops.pmd_clear = vmi_pmd_clear; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 72f6a76dbfb9..db3802fb7b84 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1870,7 +1870,6 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { #ifdef CONFIG_X86_PAE .set_pte_atomic = xen_set_pte_atomic, - .set_pte_present = xen_set_pte_at, .pte_clear = xen_pte_clear, .pmd_clear = xen_pmd_clear, #endif /* CONFIG_X86_PAE */ -- cgit v1.2.3 From 14fc9fbc700dc95b4f46ebd588169324fe6deff8 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 19 Mar 2009 10:56:29 -0700 Subject: x86: signal: check signal stack overflow properly Impact: cleanup Check alternate signal stack overflow with proper stack pointer. The stack pointer of the next signal frame is different if that task has i387 state. On x86_64, redzone would be included. No need to check SA_ONSTACK if we're already using alternate signal stack. Signed-off-by: Hiroshi Shimamoto Cc: Roland McGrath LKML-Reference: <49C2874D.3080002@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal.c | 48 +++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index d2cc6428c587..dfcc74ab0ab6 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -211,31 +211,27 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, { /* Default to using normal stack */ unsigned long sp = regs->sp; + int onsigstack = on_sig_stack(sp); #ifdef CONFIG_X86_64 /* redzone */ sp -= 128; #endif /* CONFIG_X86_64 */ - /* - * If we are on the alternate signal stack and would overflow it, don't. - * Return an always-bogus address instead so we will die with SIGSEGV. - */ - if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) - return (void __user *) -1L; - - /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { - if (sas_ss_flags(sp) == 0) - sp = current->sas_ss_sp + current->sas_ss_size; - } else { + if (!onsigstack) { + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(sp) == 0) + sp = current->sas_ss_sp + current->sas_ss_size; + } else { #ifdef CONFIG_X86_32 - /* This is the legacy signal stack switching. */ - if ((regs->ss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) - sp = (unsigned long) ka->sa.sa_restorer; + /* This is the legacy signal stack switching. */ + if ((regs->ss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) + sp = (unsigned long) ka->sa.sa_restorer; #endif /* CONFIG_X86_32 */ + } } if (used_math()) { @@ -244,12 +240,22 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, sp = round_down(sp, 64); #endif /* CONFIG_X86_64 */ *fpstate = (void __user *)sp; - - if (save_i387_xstate(*fpstate) < 0) - return (void __user *)-1L; } - return (void __user *)align_sigframe(sp - frame_size); + sp = align_sigframe(sp - frame_size); + + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (onsigstack && !likely(on_sig_stack(sp))) + return (void __user *)-1L; + + /* save i387 state */ + if (used_math() && save_i387_xstate(*fpstate) < 0) + return (void __user *)-1L; + + return (void __user *)sp; } #ifdef CONFIG_X86_32 -- cgit v1.2.3 From 04c93ce4991fce731dab346d03964504339347db Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 20 Mar 2009 21:02:55 +0100 Subject: x86: fix IO APIC resource allocation error message Impact: fix incorrect error message - IO APIC resource allocation error message contains one too many "be". - Print the error message iff there are IO APICs in the system. I've seen this error message for some time on my x86-32 laptop... Signed-off-by: Bartlomiej Zolnierkiewicz Cc: Alan Bartlett LKML-Reference: <200903202100.30789.bzolnier@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 42cdc78427a2..d882c03604ee 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4130,9 +4130,12 @@ static int __init ioapic_insert_resources(void) struct resource *r = ioapic_resources; if (!r) { - printk(KERN_ERR - "IO APIC resources could be not be allocated.\n"); - return -1; + if (nr_ioapics > 0) { + printk(KERN_ERR + "IO APIC resources couldn't be allocated.\n"); + return -1; + } + return 0; } for (i = 0; i < nr_ioapics; i++) { -- cgit v1.2.3 From 5a5737eac224f01e264477954d92ed6e69047b7a Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 21 Mar 2009 13:28:39 +0530 Subject: x86: mpparse.c introduce smp_dump_mptable helper function smp_read_mpc() and replace_intsrc_all() can use same smp_dump_mptable() Signed-off-by: Jaswinder Singh Rajput --- arch/x86/kernel/mpparse.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 290cb57f4697..4216d2653662 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -282,6 +282,14 @@ static void skip_entry(unsigned char **ptr, int *count, int size) *count += size; } +static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt) +{ + printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n" + "type %x\n", *mpt); + print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, + 1, mpc, mpc->length, 1); +} + static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) { char str[16]; @@ -340,10 +348,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) break; default: /* wrong mptable */ - printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); - printk(KERN_ERR "type %x\n", *mpt); - print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, - 1, mpc, mpc->length, 1); + smp_dump_mptable(mpc, mpt); count = mpc->length; break; } @@ -910,10 +915,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, break; default: /* wrong mptable */ - printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); - printk(KERN_ERR "type %x\n", *mpt); - print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, - 1, mpc, mpc->length, 1); + smp_dump_mptable(mpc, mpt); goto out; } } -- cgit v1.2.3 From 0b3ba0c3ccc7ced2a06fed405e80c8e1c77a3ee7 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 21 Mar 2009 13:43:20 +0530 Subject: x86: mpparse.c introduce check_physptr helper function To reduce the size of the oversized function __get_smp_config() There should be no impact to functionality. Signed-off-by: Jaswinder Singh Rajput --- arch/x86/kernel/mpparse.c | 94 +++++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 44 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 4216d2653662..dce99dca6cf8 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -555,6 +555,55 @@ static unsigned long __init get_mpc_size(unsigned long physptr) return size; } +static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) +{ + struct mpc_table *mpc; + unsigned long size; + + size = get_mpc_size(mpf->physptr); + mpc = early_ioremap(mpf->physptr, size); + /* + * Read the physical hardware table. Anything here will + * override the defaults. + */ + if (!smp_read_mpc(mpc, early)) { +#ifdef CONFIG_X86_LOCAL_APIC + smp_found_config = 0; +#endif + printk(KERN_ERR "BIOS bug, MP table errors detected!...\n" + "... disabling SMP support. (tell your hw vendor)\n"); + early_iounmap(mpc, size); + return -1; + } + early_iounmap(mpc, size); + + if (early) + return -1; + +#ifdef CONFIG_X86_IO_APIC + /* + * If there are no explicit MP IRQ entries, then we are + * broken. We set up most of the low 16 IO-APIC pins to + * ISA defaults and hope it will work. + */ + if (!mp_irq_entries) { + struct mpc_bus bus; + + printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " + "using default mptable. (tell your hw vendor)\n"); + + bus.type = MP_BUS; + bus.busid = 0; + memcpy(bus.bustype, "ISA ", 6); + MP_bus_info(&bus); + + construct_default_ioirq_mptable(0); + } +#endif + + return 0; +} + /* * Scan the memory blocks for an SMP configuration block. */ @@ -608,51 +657,8 @@ static void __init __get_smp_config(unsigned int early) construct_default_ISA_mptable(mpf->feature1); } else if (mpf->physptr) { - struct mpc_table *mpc; - unsigned long size; - - size = get_mpc_size(mpf->physptr); - mpc = early_ioremap(mpf->physptr, size); - /* - * Read the physical hardware table. Anything here will - * override the defaults. - */ - if (!smp_read_mpc(mpc, early)) { -#ifdef CONFIG_X86_LOCAL_APIC - smp_found_config = 0; -#endif - printk(KERN_ERR - "BIOS bug, MP table errors detected!...\n"); - printk(KERN_ERR "... disabling SMP support. " - "(tell your hw vendor)\n"); - early_iounmap(mpc, size); - return; - } - early_iounmap(mpc, size); - - if (early) + if (check_physptr(mpf, early)) return; -#ifdef CONFIG_X86_IO_APIC - /* - * If there are no explicit MP IRQ entries, then we are - * broken. We set up most of the low 16 IO-APIC pins to - * ISA defaults and hope it will work. - */ - if (!mp_irq_entries) { - struct mpc_bus bus; - - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " - "using default mptable. " - "(tell your hw vendor)\n"); - - bus.type = MP_BUS; - bus.busid = 0; - memcpy(bus.bustype, "ISA ", 6); - MP_bus_info(&bus); - - construct_default_ioirq_mptable(0); - } -#endif } else BUG(); -- cgit v1.2.3 From 271eb5c588e5d0a41eca6e118b6927af3f0f04b9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 21 Mar 2009 16:55:24 +0530 Subject: x86: topology.c cleanup Impact: cleanup Signed-off-by: Jaswinder Singh Rajput --- arch/x86/kernel/topology.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 0fcc95a354f7..7e4515957a1c 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -25,10 +25,10 @@ * * Send feedback to */ -#include -#include #include #include +#include +#include #include static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); @@ -47,6 +47,7 @@ int __ref arch_register_cpu(int num) */ if (num) per_cpu(cpu_devices, num).cpu.hotpluggable = 1; + return register_cpu(&per_cpu(cpu_devices, num).cpu, num); } EXPORT_SYMBOL(arch_register_cpu); @@ -56,12 +57,13 @@ void arch_unregister_cpu(int num) unregister_cpu(&per_cpu(cpu_devices, num).cpu); } EXPORT_SYMBOL(arch_unregister_cpu); -#else +#else /* CONFIG_HOTPLUG_CPU */ + static int __init arch_register_cpu(int num) { return register_cpu(&per_cpu(cpu_devices, num).cpu, num); } -#endif /*CONFIG_HOTPLUG_CPU*/ +#endif /* CONFIG_HOTPLUG_CPU */ static int __init topology_init(void) { @@ -70,11 +72,11 @@ static int __init topology_init(void) #ifdef CONFIG_NUMA for_each_online_node(i) register_one_node(i); -#endif /* CONFIG_NUMA */ +#endif for_each_present_cpu(i) arch_register_cpu(i); + return 0; } - subsys_initcall(topology_init); -- cgit v1.2.3 From 390cd85c8ae4dc54ffba460a0e6575889170f56e Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 21 Mar 2009 16:55:45 +0530 Subject: x86: kdebugfs.c cleanup Impact: cleanup Signed-off-by: Jaswinder Singh Rajput --- arch/x86/kernel/kdebugfs.c | 82 ++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 46 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index ff7d3b0124f1..e444357375ce 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -8,11 +8,11 @@ */ #include #include -#include +#include #include +#include #include #include -#include #include @@ -26,9 +26,8 @@ struct setup_data_node { u32 len; }; -static ssize_t -setup_data_read(struct file *file, char __user *user_buf, size_t count, - loff_t *ppos) +static ssize_t setup_data_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) { struct setup_data_node *node = file->private_data; unsigned long remain; @@ -39,20 +38,21 @@ setup_data_read(struct file *file, char __user *user_buf, size_t count, if (pos < 0) return -EINVAL; + if (pos >= node->len) return 0; if (count > node->len - pos) count = node->len - pos; + pa = node->paddr + sizeof(struct setup_data) + pos; pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT); if (PageHighMem(pg)) { p = ioremap_cache(pa, count); if (!p) return -ENXIO; - } else { + } else p = __va(pa); - } remain = copy_to_user(user_buf, p, count); @@ -70,12 +70,13 @@ setup_data_read(struct file *file, char __user *user_buf, size_t count, static int setup_data_open(struct inode *inode, struct file *file) { file->private_data = inode->i_private; + return 0; } static const struct file_operations fops_setup_data = { - .read = setup_data_read, - .open = setup_data_open, + .read = setup_data_read, + .open = setup_data_open, }; static int __init @@ -84,57 +85,50 @@ create_setup_data_node(struct dentry *parent, int no, { struct dentry *d, *type, *data; char buf[16]; - int error; sprintf(buf, "%d", no); d = debugfs_create_dir(buf, parent); - if (!d) { - error = -ENOMEM; - goto err_return; - } + if (!d) + return -ENOMEM; + type = debugfs_create_x32("type", S_IRUGO, d, &node->type); - if (!type) { - error = -ENOMEM; + if (!type) goto err_dir; - } + data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data); - if (!data) { - error = -ENOMEM; + if (!data) goto err_type; - } + return 0; err_type: debugfs_remove(type); err_dir: debugfs_remove(d); -err_return: - return error; + return -ENOMEM; } static int __init create_setup_data_nodes(struct dentry *parent) { struct setup_data_node *node; struct setup_data *data; - int error, no = 0; + int error = -ENOMEM; struct dentry *d; struct page *pg; u64 pa_data; + int no = 0; d = debugfs_create_dir("setup_data", parent); - if (!d) { - error = -ENOMEM; - goto err_return; - } + if (!d) + return -ENOMEM; pa_data = boot_params.hdr.setup_data; while (pa_data) { node = kmalloc(sizeof(*node), GFP_KERNEL); - if (!node) { - error = -ENOMEM; + if (!node) goto err_dir; - } + pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); if (PageHighMem(pg)) { data = ioremap_cache(pa_data, sizeof(*data)); @@ -143,9 +137,8 @@ static int __init create_setup_data_nodes(struct dentry *parent) error = -ENXIO; goto err_dir; } - } else { + } else data = __va(pa_data); - } node->paddr = pa_data; node->type = data->type; @@ -159,11 +152,11 @@ static int __init create_setup_data_nodes(struct dentry *parent) goto err_dir; no++; } + return 0; err_dir: debugfs_remove(d); -err_return: return error; } @@ -175,28 +168,26 @@ static struct debugfs_blob_wrapper boot_params_blob = { static int __init boot_params_kdebugfs_init(void) { struct dentry *dbp, *version, *data; - int error; + int error = -ENOMEM; dbp = debugfs_create_dir("boot_params", NULL); - if (!dbp) { - error = -ENOMEM; - goto err_return; - } + if (!dbp) + return -ENOMEM; + version = debugfs_create_x16("version", S_IRUGO, dbp, &boot_params.hdr.version); - if (!version) { - error = -ENOMEM; + if (!version) goto err_dir; - } + data = debugfs_create_blob("data", S_IRUGO, dbp, &boot_params_blob); - if (!data) { - error = -ENOMEM; + if (!data) goto err_version; - } + error = create_setup_data_nodes(dbp); if (error) goto err_data; + return 0; err_data: @@ -205,10 +196,9 @@ err_version: debugfs_remove(version); err_dir: debugfs_remove(dbp); -err_return: return error; } -#endif +#endif /* CONFIG_DEBUG_BOOT_PARAMS */ static int __init arch_kdebugfs_init(void) { -- cgit v1.2.3 From c8344bc218ce7ebc728337839698566a79edfe5a Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 21 Mar 2009 16:56:10 +0530 Subject: x86: i8253 cleanup Impact: cleanup - fix various style problems - fix header file issues Signed-off-by: Jaswinder Singh Rajput --- arch/x86/kernel/i8253.c | 68 ++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 35 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 10f92fb532f3..3475440baa54 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -3,17 +3,17 @@ * */ #include -#include #include +#include #include #include -#include +#include +#include +#include -#include -#include #include -#include #include +#include DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); @@ -40,7 +40,7 @@ static void init_pit_timer(enum clock_event_mode mode, { spin_lock(&i8253_lock); - switch(mode) { + switch (mode) { case CLOCK_EVT_MODE_PERIODIC: /* binary, mode 2, LSB/MSB, ch 0 */ outb_pit(0x34, PIT_MODE); @@ -95,7 +95,7 @@ static int pit_next_event(unsigned long delta, struct clock_event_device *evt) * registered. This mechanism replaces the previous #ifdef LOCAL_APIC - * !using_apic_timer decisions in do_timer_interrupt_hook() */ -static struct clock_event_device pit_clockevent = { +static struct clock_event_device pit_ce = { .name = "pit", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = init_pit_timer, @@ -114,15 +114,13 @@ void __init setup_pit_timer(void) * Start pit with the boot cpu mask and make it global after the * IO_APIC has been initialized. */ - pit_clockevent.cpumask = cpumask_of(smp_processor_id()); - pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, - pit_clockevent.shift); - pit_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFF, &pit_clockevent); - pit_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &pit_clockevent); - clockevents_register_device(&pit_clockevent); - global_clock_event = &pit_clockevent; + pit_ce.cpumask = cpumask_of(smp_processor_id()); + pit_ce.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_ce.shift); + pit_ce.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_ce); + pit_ce.min_delta_ns = clockevent_delta2ns(0xF, &pit_ce); + + clockevents_register_device(&pit_ce); + global_clock_event = &pit_ce; } #ifndef CONFIG_X86_64 @@ -133,11 +131,11 @@ void __init setup_pit_timer(void) */ static cycle_t pit_read(void) { + static int old_count; + static u32 old_jifs; unsigned long flags; int count; u32 jifs; - static int old_count; - static u32 old_jifs; spin_lock_irqsave(&i8253_lock, flags); /* @@ -179,9 +177,9 @@ static cycle_t pit_read(void) * Previous attempts to handle these cases intelligently were * buggy, so we just do the simple thing now. */ - if (count > old_count && jifs == old_jifs) { + if (count > old_count && jifs == old_jifs) count = old_count; - } + old_count = count; old_jifs = jifs; @@ -192,13 +190,13 @@ static cycle_t pit_read(void) return (cycle_t)(jifs * LATCH) + count; } -static struct clocksource clocksource_pit = { - .name = "pit", - .rating = 110, - .read = pit_read, - .mask = CLOCKSOURCE_MASK(32), - .mult = 0, - .shift = 20, +static struct clocksource pit_cs = { + .name = "pit", + .rating = 110, + .read = pit_read, + .mask = CLOCKSOURCE_MASK(32), + .mult = 0, + .shift = 20, }; static void pit_disable_clocksource(void) @@ -206,9 +204,9 @@ static void pit_disable_clocksource(void) /* * Use mult to check whether it is registered or not */ - if (clocksource_pit.mult) { - clocksource_unregister(&clocksource_pit); - clocksource_pit.mult = 0; + if (pit_cs.mult) { + clocksource_unregister(&pit_cs); + pit_cs.mult = 0; } } @@ -222,13 +220,13 @@ static int __init init_pit_clocksource(void) * - when local APIC timer is active (PIT is switched off) */ if (num_possible_cpus() > 1 || is_hpet_enabled() || - pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC) + pit_ce.mode != CLOCK_EVT_MODE_PERIODIC) return 0; - clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, - clocksource_pit.shift); - return clocksource_register(&clocksource_pit); + pit_cs.mult = clocksource_hz2mult(CLOCK_TICK_RATE, pit_cs.shift); + + return clocksource_register(&pit_cs); } arch_initcall(init_pit_clocksource); -#endif +#endif /* !CONFIG_X86_64 */ -- cgit v1.2.3 From 8383d821e7481f7cde9c17b61deb1b4af43b2cd9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 21 Mar 2009 16:56:37 +0530 Subject: x86: rtc.c cleanup Impact: cleanup - fix various style problems - fix header file issues Signed-off-by: Jaswinder Singh Rajput --- arch/x86/kernel/rtc.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index dd6f2b71561b..5d465b207e72 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -1,14 +1,14 @@ /* * RTC related functions */ +#include +#include #include #include -#include -#include #include -#include #include +#include #ifdef CONFIG_X86_32 /* @@ -16,9 +16,9 @@ * register we are working with. It is required for NMI access to the * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details. */ -volatile unsigned long cmos_lock = 0; +volatile unsigned long cmos_lock; EXPORT_SYMBOL(cmos_lock); -#endif +#endif /* CONFIG_X86_32 */ /* For two digit years assume time is always after that */ #define CMOS_YEARS_OFFS 2000 @@ -38,9 +38,9 @@ EXPORT_SYMBOL(rtc_lock); */ int mach_set_rtc_mmss(unsigned long nowtime) { - int retval = 0; int real_seconds, real_minutes, cmos_minutes; unsigned char save_control, save_freq_select; + int retval = 0; /* tell the clock it's being set */ save_control = CMOS_READ(RTC_CONTROL); @@ -72,8 +72,8 @@ int mach_set_rtc_mmss(unsigned long nowtime) real_seconds = bin2bcd(real_seconds); real_minutes = bin2bcd(real_minutes); } - CMOS_WRITE(real_seconds,RTC_SECONDS); - CMOS_WRITE(real_minutes,RTC_MINUTES); + CMOS_WRITE(real_seconds, RTC_SECONDS); + CMOS_WRITE(real_minutes, RTC_MINUTES); } else { printk(KERN_WARNING "set_rtc_mmss: can't update from %d to %d\n", @@ -151,6 +151,7 @@ unsigned char rtc_cmos_read(unsigned char addr) outb(addr, RTC_PORT(0)); val = inb(RTC_PORT(1)); lock_cmos_suffix(addr); + return val; } EXPORT_SYMBOL(rtc_cmos_read); @@ -166,8 +167,8 @@ EXPORT_SYMBOL(rtc_cmos_write); static int set_rtc_mmss(unsigned long nowtime) { - int retval; unsigned long flags; + int retval; spin_lock_irqsave(&rtc_lock, flags); retval = set_wallclock(nowtime); @@ -242,6 +243,7 @@ static __init int add_rtc_cmos(void) platform_device_register(&rtc_device); dev_info(&rtc_device.dev, "registered platform RTC device (no PNP device found)\n"); + return 0; } device_initcall(add_rtc_cmos); -- cgit v1.2.3 From d53a44446076a7dd68a4fb7f549fb6aeda9bfc2c Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 21 Mar 2009 16:57:04 +0530 Subject: x86: io_delay.c cleanup Impact: cleanup - fix header file issues Signed-off-by: Jaswinder Singh Rajput --- arch/x86/kernel/io_delay.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index 720d2607aacb..a979b5bd2fc0 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c @@ -7,10 +7,10 @@ */ #include #include -#include #include +#include #include -#include +#include int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE; @@ -47,8 +47,7 @@ EXPORT_SYMBOL(native_io_delay); static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id) { if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) { - printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", - id->ident); + pr_notice("%s: using 0xed I/O delay port\n", id->ident); io_delay_type = CONFIG_IO_DELAY_TYPE_0XED; } @@ -64,40 +63,40 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { .callback = dmi_io_delay_0xed_port, .ident = "Compaq Presario V6000", .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), - DMI_MATCH(DMI_BOARD_NAME, "30B7") + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30B7") } }, { .callback = dmi_io_delay_0xed_port, .ident = "HP Pavilion dv9000z", .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), - DMI_MATCH(DMI_BOARD_NAME, "30B9") + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30B9") } }, { .callback = dmi_io_delay_0xed_port, .ident = "HP Pavilion dv6000", .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), - DMI_MATCH(DMI_BOARD_NAME, "30B8") + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30B8") } }, { .callback = dmi_io_delay_0xed_port, .ident = "HP Pavilion tx1000", .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), - DMI_MATCH(DMI_BOARD_NAME, "30BF") + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30BF") } }, { .callback = dmi_io_delay_0xed_port, .ident = "Presario F700", .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), - DMI_MATCH(DMI_BOARD_NAME, "30D3") + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30D3") } }, { } -- cgit v1.2.3 From 1894e36754d682cc049b2b1c3825da8e585967d5 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 21 Mar 2009 17:01:25 +0530 Subject: x86: pci-nommu.c cleanup Impact: cleanup Signed-off-by: Jaswinder Singh Rajput --- arch/x86/kernel/pci-nommu.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index c70ab5a5d4c8..8b02a3936d42 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -1,14 +1,14 @@ /* Fallback functions when the main IOMMU code is not compiled in. This code is roughly equivalent to i386. */ -#include -#include -#include -#include #include #include +#include +#include +#include +#include -#include #include +#include #include static int @@ -79,11 +79,11 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, } struct dma_mapping_ops nommu_dma_ops = { - .alloc_coherent = dma_generic_alloc_coherent, - .free_coherent = nommu_free_coherent, - .map_single = nommu_map_single, - .map_sg = nommu_map_sg, - .is_phys = 1, + .alloc_coherent = dma_generic_alloc_coherent, + .free_coherent = nommu_free_coherent, + .map_single = nommu_map_single, + .map_sg = nommu_map_sg, + .is_phys = 1, }; void __init no_iommu_init(void) -- cgit v1.2.3 From 1cc185211a9cb913f6adbe3354e5c256f456ebd2 Mon Sep 17 00:00:00 2001 From: Dmitri Vorobiev Date: Sun, 22 Mar 2009 19:11:09 +0200 Subject: x86: Fix a couple of sparse warnings in arch/x86/kernel/apic/io_apic.c Impact: cleanup This patch fixes the following sparse warnings: arch/x86/kernel/apic/io_apic.c:3602:17: warning: symbol 'hpet_msi_type' was not declared. Should it be static? arch/x86/kernel/apic/io_apic.c:3467:30: warning: Using plain integer as NULL pointer Signed-off-by: Dmitri Vorobiev LKML-Reference: <1237741871-5827-2-git-send-email-dmitri.vorobiev@movial.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 42cdc78427a2..ea97e5efa907 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3464,7 +3464,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int ret, sub_handle; struct msi_desc *msidesc; unsigned int irq_want; - struct intel_iommu *iommu = 0; + struct intel_iommu *iommu = NULL; int index = 0; irq_want = nr_irqs_gsi; @@ -3599,7 +3599,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) #endif /* CONFIG_SMP */ -struct irq_chip hpet_msi_type = { +static struct irq_chip hpet_msi_type = { .name = "HPET_MSI", .unmask = hpet_msi_unmask, .mask = hpet_msi_mask, -- cgit v1.2.3 From f2362e6f1b9c5c168e5b4159afb4853ba467965e Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 23 Mar 2009 02:06:51 +0530 Subject: x86: cpu/cpu.h cleanup Impact: cleanup - Fix various style issues Signed-off-by: Jaswinder Singh Rajput --- arch/x86/kernel/cpu/cpu.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 9469ecb5aeb8..6de9a908e400 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -3,25 +3,25 @@ #define ARCH_X86_CPU_H struct cpu_model_info { - int vendor; - int family; - const char *model_names[16]; + int vendor; + int family; + const char *model_names[16]; }; /* attempt to consolidate cpu attributes */ struct cpu_dev { - const char * c_vendor; + const char *c_vendor; /* some have two possibilities for cpuid string */ - const char * c_ident[2]; + const char *c_ident[2]; struct cpu_model_info c_models[4]; - void (*c_early_init)(struct cpuinfo_x86 *c); - void (*c_init)(struct cpuinfo_x86 * c); - void (*c_identify)(struct cpuinfo_x86 * c); - unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); - int c_x86_vendor; + void (*c_early_init)(struct cpuinfo_x86 *); + void (*c_init)(struct cpuinfo_x86 *); + void (*c_identify)(struct cpuinfo_x86 *); + unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); + int c_x86_vendor; }; #define cpu_dev_register(cpu_devX) \ -- cgit v1.2.3 From ce2d8bfd44c01fc9b22d64617b7e520e99095f33 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 23 Mar 2009 02:08:00 +0530 Subject: x86: irq.c use same path for show_interrupts Impact: cleanup SMP and !SMP will use same path for show_interrupts Signed-off-by: Jaswinder Singh Rajput --- arch/x86/kernel/irq.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index b8ac3b6cf776..5e7c3e6f8f27 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -133,23 +133,15 @@ int show_interrupts(struct seq_file *p, void *v) return 0; spin_lock_irqsave(&desc->lock, flags); -#ifndef CONFIG_SMP - any_count = kstat_irqs(i); -#else for_each_online_cpu(j) any_count |= kstat_irqs_cpu(i, j); -#endif action = desc->action; if (!action && !any_count) goto out; seq_printf(p, "%*d: ", prec, i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); -#endif seq_printf(p, " %8s", desc->chip->name); seq_printf(p, "-%-8s", desc->name); -- cgit v1.2.3 From 474e56b82c06cdbed468aea957805e0eb19d3510 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 23 Mar 2009 02:08:34 +0530 Subject: x86: irq.c keep CONFIG_X86_LOCAL_APIC interrupts together Impact: cleanup keep CONFIG_X86_LOCAL_APIC interrupts together to avoid extra ifdef Signed-off-by: Jaswinder Singh Rajput --- arch/x86/kernel/irq.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 5e7c3e6f8f27..3aaf7b9e3a8b 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -58,6 +58,11 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); seq_printf(p, " Local timer interrupts\n"); + + seq_printf(p, "%*s: ", prec, "SPU"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); #endif if (generic_interrupt_extension) { seq_printf(p, "PLT: "); @@ -90,12 +95,6 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); # endif -#endif -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "%*s: ", prec, "SPU"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); - seq_printf(p, " Spurious interrupts\n"); #endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) @@ -166,6 +165,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) #ifdef CONFIG_X86_LOCAL_APIC sum += irq_stats(cpu)->apic_timer_irqs; + sum += irq_stats(cpu)->irq_spurious_count; #endif if (generic_interrupt_extension) sum += irq_stats(cpu)->generic_irqs; @@ -179,9 +179,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) # ifdef CONFIG_X86_64 sum += irq_stats(cpu)->irq_threshold_count; #endif -#endif -#ifdef CONFIG_X86_LOCAL_APIC - sum += irq_stats(cpu)->irq_spurious_count; #endif return sum; } -- cgit v1.2.3 From a1e38ca5ce1789de1bbd723e1e09de962e47ce18 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 23 Mar 2009 02:11:25 +0530 Subject: x86: apic/io_apic.c define msi_ir_chip and ir_ioapic_chip all the time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit move out msi_ir_chip and ir_ioapic_chip from CONFIG_INTR_REMAP shadow Fix: arch/x86/kernel/apic/io_apic.c:1431: warning: ‘msi_ir_chip’ defined but not used Signed-off-by: Jaswinder Singh Rajput --- arch/x86/kernel/apic/io_apic.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 42cdc78427a2..d36e3d8be0f1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1428,7 +1428,6 @@ void __setup_vector_irq(int cpu) static struct irq_chip ioapic_chip; static struct irq_chip ir_ioapic_chip; -static struct irq_chip msi_ir_chip; #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 @@ -2663,20 +2662,20 @@ static struct irq_chip ioapic_chip __read_mostly = { .retrigger = ioapic_retrigger_irq, }; -#ifdef CONFIG_INTR_REMAP static struct irq_chip ir_ioapic_chip __read_mostly = { .name = "IR-IO-APIC", .startup = startup_ioapic_irq, .mask = mask_IO_APIC_irq, .unmask = unmask_IO_APIC_irq, +#ifdef CONFIG_INTR_REMAP .ack = ack_x2apic_edge, .eoi = ack_x2apic_level, #ifdef CONFIG_SMP .set_affinity = set_ir_ioapic_affinity_irq, +#endif #endif .retrigger = ioapic_retrigger_irq, }; -#endif static inline void init_IO_APIC_traps(void) { @@ -3391,18 +3390,18 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; -#ifdef CONFIG_INTR_REMAP static struct irq_chip msi_ir_chip = { .name = "IR-PCI-MSI", .unmask = unmask_msi_irq, .mask = mask_msi_irq, +#ifdef CONFIG_INTR_REMAP .ack = ack_x2apic_edge, #ifdef CONFIG_SMP .set_affinity = ir_set_msi_irq_affinity, +#endif #endif .retrigger = ioapic_retrigger_irq, }; -#endif /* * Map the PCI dev to the corresponding remapping hardware unit -- cgit v1.2.3 From ba639039d68cd978f4fa900a6533fe930609ed35 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 23 Mar 2009 02:13:01 +0530 Subject: x86: e820 fix various signedness issues in setup.c and e820.c Impact: cleanup This fixed various signedness issues in setup.c and e820.c: arch/x86/kernel/setup.c:455:53: warning: incorrect type in argument 3 (different signedness) arch/x86/kernel/setup.c:455:53: expected int *pnr_map arch/x86/kernel/setup.c:455:53: got unsigned int extern [toplevel] * arch/x86/kernel/setup.c:639:53: warning: incorrect type in argument 3 (different signedness) arch/x86/kernel/setup.c:639:53: expected int *pnr_map arch/x86/kernel/setup.c:639:53: got unsigned int extern [toplevel] * arch/x86/kernel/setup.c:820:54: warning: incorrect type in argument 3 (different signedness) arch/x86/kernel/setup.c:820:54: expected int *pnr_map arch/x86/kernel/setup.c:820:54: got unsigned int extern [toplevel] * arch/x86/kernel/e820.c:670:53: warning: incorrect type in argument 3 (different signedness) arch/x86/kernel/e820.c:670:53: expected int *pnr_map arch/x86/kernel/e820.c:670:53: got unsigned int [toplevel] * Signed-off-by: Jaswinder Singh Rajput --- arch/x86/include/asm/e820.h | 2 +- arch/x86/kernel/e820.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 00d41ce4c844..7ecba4d85089 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -72,7 +72,7 @@ extern int e820_all_mapped(u64 start, u64 end, unsigned type); extern void e820_add_region(u64 start, u64 size, int type); extern void e820_print_map(char *who); extern int -sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map); +sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map); extern u64 e820_update_range(u64 start, u64 size, unsigned old_type, unsigned new_type); extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index fb638d9ce6d2..ef2c3563357d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -233,7 +233,7 @@ void __init e820_print_map(char *who) */ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, - int *pnr_map) + u32 *pnr_map) { struct change_member { struct e820entry *pbios; /* pointer to original bios entry */ @@ -552,7 +552,7 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, void __init update_e820(void) { - int nr_map; + u32 nr_map; nr_map = e820.nr_map; if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) @@ -563,7 +563,7 @@ void __init update_e820(void) } static void __init update_e820_saved(void) { - int nr_map; + u32 nr_map; nr_map = e820_saved.nr_map; if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) @@ -1303,7 +1303,7 @@ early_param("memmap", parse_memmap_opt); void __init finish_e820_parsing(void) { if (userdef) { - int nr = e820.nr_map; + u32 nr = e820.nr_map; if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) early_panic("Invalid user supplied memory map"); @@ -1386,7 +1386,7 @@ void __init e820_reserve_resources_late(void) char *__init default_machine_specific_memory_setup(void) { char *who = "BIOS-e820"; - int new_nr; + u32 new_nr; /* * Try to copy the BIOS-supplied E820-map. * -- cgit v1.2.3 From fa74c9073370e57fa28e02aff13f4d7b1806505c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 24 Mar 2009 13:23:16 -0700 Subject: x86: fix set_extra_move_desc calling Impact: fix bug with irq-descriptor moving when logical flat Rusty observed: > The effect of setting desc->affinity (ie. from userspace via sysfs) has varied > over time. In 2.6.27, the 32-bit code anded the value with cpu_online_map, > and both 32 and 64-bit did that anding whenever a cpu was unplugged. > > 2.6.29 consolidated this into one routine (and fixed hotplug) but introduced > another variation: anding the affinity with cfg->domain. Is this right, or > should we just set it to what the user said? Or as now, indicate that we're > restricting it. Eric pointed out that desc->affinity should be what the user requested, if it is at all possible to honor the user space request. This bug got introduced by commit 22f65d31b "x86: Update io_apic.c to use new cpumask API". Fix it by moving the masking to before the descriptor moving ... Reported-by: Rusty Russell Reported-by: Eric W. Biederman LKML-Reference: <49C94134.4000408@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 86827d85488a..1ed6c0600cde 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -592,8 +592,9 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) if (assign_irq_vector(irq, cfg, mask)) return BAD_APICID; - cpumask_and(desc->affinity, cfg->domain, mask); + /* check that before desc->addinity get updated */ set_extra_move_desc(desc, mask); + cpumask_and(desc->affinity, cfg->domain, mask); return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); } -- cgit v1.2.3 From f56e5034121c4911a155ba907076ab920754626d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 24 Mar 2009 14:16:30 -0700 Subject: x86: use default_cpu_mask_to_apicid for 64bit Impact: cleanup Use online_mask directly on 64bit too. Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49C94DAE.9070300@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 20 ++++++++++---------- arch/x86/kernel/apic/apic_flat_64.c | 18 ++---------------- 2 files changed, 12 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 00f5962d82d0..130a9e2b4586 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -489,10 +489,19 @@ static inline int default_apic_id_registered(void) return physid_isset(read_apic_id(), phys_cpu_present_map); } +static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +extern int default_apicid_to_node(int logical_apicid); + +#endif + static inline unsigned int default_cpu_mask_to_apicid(const struct cpumask *cpumask) { - return cpumask_bits(cpumask)[0]; + return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; } static inline unsigned int @@ -506,15 +515,6 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return (unsigned int)(mask1 & mask2 & mask3); } -static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -extern int default_apicid_to_node(int logical_apicid); - -#endif - static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) { return physid_isset(apicid, bitmap); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index f933822dba18..0014714ea97b 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -159,20 +159,6 @@ static int flat_apic_id_registered(void) return physid_isset(read_xapic_id(), phys_cpu_present_map); } -static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; -} - -static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; - unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS; - - return mask1 & mask2; -} - static int flat_phys_pkg_id(int initial_apic_id, int index_msb) { return hard_smp_processor_id() >> index_msb; @@ -213,8 +199,8 @@ struct apic apic_flat = { .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu << 24, - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = flat_send_IPI_mask, .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, -- cgit v1.2.3 From e06b1b56f9bfcc91e1f175fe8d8bf3e35dafa080 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 24 Mar 2009 14:17:19 -0700 Subject: x86: Correct behaviour of irq affinity Impact: get correct smp_affinity as user requested The effect of setting desc->affinity (ie. from userspace via sysfs) has varied over time. In 2.6.27, the 32-bit code anded the value with cpu_online_map, and both 32 and 64-bit did that anding whenever a cpu was unplugged. 2.6.29 consolidated this into one routine (and fixed hotplug) but introduced another variation: anding the affinity with cfg->domain. We should just set it to what the user said - if possible. (cpu_mask_to_apicid_and already takes cpu_online_mask into account) Signed-off-by: Yinghai Lu Acked-by: "Eric W. Biederman" Cc: Andrew Morton LKML-Reference: <49C94DDF.2010703@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1ed6c0600cde..d990408ca06f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -594,9 +594,10 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) /* check that before desc->addinity get updated */ set_extra_move_desc(desc, mask); - cpumask_and(desc->affinity, cfg->domain, mask); - return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); + cpumask_copy(desc->affinity, mask); + + return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); } static void -- cgit v1.2.3 From 70511134f61bd6e5eed19f767381f9fb3e762d49 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Mon, 23 Mar 2009 23:14:29 -0700 Subject: Revert "x86: don't compile vsmp_64 for 32bit" Partial revert of commit 129d8bc828e011bda0b7110a097bf3a0167f966e titled 'x86: don't compile vsmp_64 for 32bit' Commit reverted to compile vsmp_64.c if CONFIG_X86_64 is defined, since is_vsmp_box() needs to indicate that TSCs are not synchronized, and hence, not a valid time source, even when CONFIG_X86_VSMP is not defined. Signed-off-by: Ravikiran Thirumalai Cc: Yinghai Lu Cc: Andrew Morton Cc: shai@scalex86.org LKML-Reference: <20090324061429.GH7278@localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 2 +- arch/x86/include/asm/setup.h | 2 +- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/vsmp_64.c | 12 +++++++++++- 4 files changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 130a9e2b4586..df8a300dfe6c 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -75,7 +75,7 @@ static inline void default_inquire_remote_apic(int apicid) #define setup_secondary_clock setup_secondary_APIC_clock #endif -#ifdef CONFIG_X86_VSMP +#ifdef CONFIG_X86_64 extern int is_vsmp_box(void); #else static inline int is_vsmp_box(void) diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index fbf0521eeed8..bdc2ada05ae0 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -64,7 +64,7 @@ extern void x86_quirk_time_init(void); #include /* Interrupt control for vSMPowered x86_64 systems */ -#ifdef CONFIG_X86_VSMP +#ifdef CONFIG_X86_64 void vsmp_init(void); #else static inline void vsmp_init(void) { } diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 339ce35648e6..6e9c1f320acf 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -70,7 +70,6 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o -obj-$(CONFIG_X86_VSMP) += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_MODULES) += module_$(BITS).o obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o @@ -120,4 +119,5 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o + obj-y += vsmp_64.o endif diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 74de562812cc..a1d804bcd483 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -22,7 +22,7 @@ #include #include -#ifdef CONFIG_PARAVIRT +#if defined CONFIG_PCI && defined CONFIG_PARAVIRT /* * Interrupt control on vSMPowered systems: * ~AC is a shadow of IF. If IF is 'on' AC should be 'off' @@ -114,6 +114,7 @@ static void __init set_vsmp_pv_ops(void) } #endif +#ifdef CONFIG_PCI static int is_vsmp = -1; static void __init detect_vsmp_box(void) @@ -139,6 +140,15 @@ int is_vsmp_box(void) } } +#else +static void __init detect_vsmp_box(void) +{ +} +int is_vsmp_box(void) +{ + return 0; +} +#endif void __init vsmp_init(void) { detect_vsmp_box(); -- cgit v1.2.3