From 6a84f57241e1fb9fb6772256f538d1073359a32d Mon Sep 17 00:00:00 2001 From: Gayatri Kammela Date: Thu, 21 Jan 2016 14:02:39 -0800 Subject: raid6/algos.c : bug fix : Add the missing definitions to the pq.h file Adding these pr_info and pr_err definitions so as to allow code to be compiled successfully for testing in userspace, since the printk has been replaced by pr_info and pr_err in algos.c Absence of these definitions result in the compilation errors such as ' undefined reference to `pr_info' ' ' undefined reference to `pr_err' ' Cc: NeilBrown Cc: Anton Blanchard Cc: Fenghua Yu Signed-off-by: Gayatri Kammela Signed-off-by: Shaohua Li --- include/linux/raid/pq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index a7a06d1dcf9c..a0118d5929a9 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -152,6 +152,8 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, # define jiffies raid6_jiffies() # define printk printf +# define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__) +# define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__) # define GFP_KERNEL 0 # define __get_free_pages(x, y) ((unsigned long)mmap(NULL, PAGE_SIZE << (y), \ PROT_READ|PROT_WRITE, \ -- cgit v1.2.3 From e93ad19d05648397ef3bcb838d26aec06c245dc0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 19 Jan 2016 12:18:41 -0500 Subject: cpuset: make mm migration asynchronous If "cpuset.memory_migrate" is set, when a process is moved from one cpuset to another with a different memory node mask, pages in used by the process are migrated to the new set of nodes. This was performed synchronously in the ->attach() callback, which is synchronized against process management. Recently, the synchronization was changed from per-process rwsem to global percpu rwsem for simplicity and optimization. Combined with the synchronous mm migration, this led to deadlocks because mm migration could schedule a work item which may in turn try to create a new worker blocking on the process management lock held from cgroup process migration path. This heavy an operation shouldn't be performed synchronously from that deep inside cgroup migration in the first place. This patch punts the actual migration to an ordered workqueue and updates cgroup process migration and cpuset config update paths to flush the workqueue after all locks are released. This way, the operations still seem synchronous to userland without entangling mm migration with process management synchronization. CPU hotplug can also invoke mm migration but there's no reason for it to wait for mm migrations and thus doesn't synchronize against their completions. Signed-off-by: Tejun Heo Reported-and-tested-by: Christian Borntraeger Cc: stable@vger.kernel.org # v4.4+ --- include/linux/cpuset.h | 6 +++++ kernel/cgroup.c | 2 ++ kernel/cpuset.c | 71 ++++++++++++++++++++++++++++++++++---------------- 3 files changed, 57 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 85a868ccb493..fea160ee5803 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -137,6 +137,8 @@ static inline void set_mems_allowed(nodemask_t nodemask) task_unlock(current); } +extern void cpuset_post_attach_flush(void); + #else /* !CONFIG_CPUSETS */ static inline bool cpusets_enabled(void) { return false; } @@ -243,6 +245,10 @@ static inline bool read_mems_allowed_retry(unsigned int seq) return false; } +static inline void cpuset_post_attach_flush(void) +{ +} + #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c03a640ef6da..88abd4d076d8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -58,6 +58,7 @@ #include #include #include +#include #include /* @@ -2739,6 +2740,7 @@ out_unlock_rcu: out_unlock_threadgroup: percpu_up_write(&cgroup_threadgroup_rwsem); cgroup_kn_unlock(of->kn); + cpuset_post_attach_flush(); return ret ?: nbytes; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3e945fcd8179..41989ab4db57 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -287,6 +287,8 @@ static struct cpuset top_cpuset = { static DEFINE_MUTEX(cpuset_mutex); static DEFINE_SPINLOCK(callback_lock); +static struct workqueue_struct *cpuset_migrate_mm_wq; + /* * CPU / memory hotplug is handled asynchronously. */ @@ -972,31 +974,51 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, } /* - * cpuset_migrate_mm - * - * Migrate memory region from one set of nodes to another. - * - * Temporarilly set tasks mems_allowed to target nodes of migration, - * so that the migration code can allocate pages on these nodes. - * - * While the mm_struct we are migrating is typically from some - * other task, the task_struct mems_allowed that we are hacking - * is for our current task, which must allocate new pages for that - * migrating memory region. + * Migrate memory region from one set of nodes to another. This is + * performed asynchronously as it can be called from process migration path + * holding locks involved in process management. All mm migrations are + * performed in the queued order and can be waited for by flushing + * cpuset_migrate_mm_wq. */ +struct cpuset_migrate_mm_work { + struct work_struct work; + struct mm_struct *mm; + nodemask_t from; + nodemask_t to; +}; + +static void cpuset_migrate_mm_workfn(struct work_struct *work) +{ + struct cpuset_migrate_mm_work *mwork = + container_of(work, struct cpuset_migrate_mm_work, work); + + /* on a wq worker, no need to worry about %current's mems_allowed */ + do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL); + mmput(mwork->mm); + kfree(mwork); +} + static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to) { - struct task_struct *tsk = current; - - tsk->mems_allowed = *to; + struct cpuset_migrate_mm_work *mwork; - do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); + mwork = kzalloc(sizeof(*mwork), GFP_KERNEL); + if (mwork) { + mwork->mm = mm; + mwork->from = *from; + mwork->to = *to; + INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn); + queue_work(cpuset_migrate_mm_wq, &mwork->work); + } else { + mmput(mm); + } +} - rcu_read_lock(); - guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed); - rcu_read_unlock(); +void cpuset_post_attach_flush(void) +{ + flush_workqueue(cpuset_migrate_mm_wq); } /* @@ -1097,7 +1119,8 @@ static void update_tasks_nodemask(struct cpuset *cs) mpol_rebind_mm(mm, &cs->mems_allowed); if (migrate) cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); - mmput(mm); + else + mmput(mm); } css_task_iter_end(&it); @@ -1545,11 +1568,11 @@ static void cpuset_attach(struct cgroup_taskset *tset) * @old_mems_allowed is the right nodesets that we * migrate mm from. */ - if (is_memory_migrate(cs)) { + if (is_memory_migrate(cs)) cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, &cpuset_attach_nodemask_to); - } - mmput(mm); + else + mmput(mm); } } @@ -1714,6 +1737,7 @@ out_unlock: mutex_unlock(&cpuset_mutex); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); + flush_workqueue(cpuset_migrate_mm_wq); return retval ?: nbytes; } @@ -2359,6 +2383,9 @@ void __init cpuset_init_smp(void) top_cpuset.effective_mems = node_states[N_MEMORY]; register_hotmemory_notifier(&cpuset_track_online_nodes_nb); + + cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0); + BUG_ON(!cpuset_migrate_mm_wq); } /** -- cgit v1.2.3 From aa226ff4a1ce79f229c6b7a4c0a14e17fececd01 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Jan 2016 15:31:11 -0500 Subject: cgroup: make sure a parent css isn't offlined before its children There are three subsystem callbacks in css shutdown path - css_offline(), css_released() and css_free(). Except for css_released(), cgroup core didn't guarantee the order of invocation. css_offline() or css_free() could be called on a parent css before its children. This behavior is unexpected and led to bugs in cpu and memory controller. This patch updates offline path so that a parent css is never offlined before its children. Each css keeps online_cnt which reaches zero iff itself and all its children are offline and offline_css() is invoked only after online_cnt reaches zero. This fixes the memory controller bug and allows the fix for cpu controller. Signed-off-by: Tejun Heo Reported-and-tested-by: Christian Borntraeger Reported-by: Brian Christiansen Link: http://lkml.kernel.org/g/5698A023.9070703@de.ibm.com Link: http://lkml.kernel.org/g/CAKB58ikDkzc8REt31WBkD99+hxNzjK4+FBmhkgS+NVrC9vjMSg@mail.gmail.com Cc: Heiko Carstens Cc: Peter Zijlstra Cc: stable@vger.kernel.org --- include/linux/cgroup-defs.h | 6 ++++++ kernel/cgroup.c | 22 +++++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 7f540f7f588d..789471dba6fb 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -127,6 +127,12 @@ struct cgroup_subsys_state { */ u64 serial_nr; + /* + * Incremented by online self and children. Used to guarantee that + * parents are not offlined before their children. + */ + atomic_t online_cnt; + /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 88abd4d076d8..d01587793865 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4760,6 +4760,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css, INIT_LIST_HEAD(&css->sibling); INIT_LIST_HEAD(&css->children); css->serial_nr = css_serial_nr_next++; + atomic_set(&css->online_cnt, 0); if (cgroup_parent(cgrp)) { css->parent = cgroup_css(cgroup_parent(cgrp), ss); @@ -4782,6 +4783,10 @@ static int online_css(struct cgroup_subsys_state *css) if (!ret) { css->flags |= CSS_ONLINE; rcu_assign_pointer(css->cgroup->subsys[ss->id], css); + + atomic_inc(&css->online_cnt); + if (css->parent) + atomic_inc(&css->parent->online_cnt); } return ret; } @@ -5019,10 +5024,15 @@ static void css_killed_work_fn(struct work_struct *work) container_of(work, struct cgroup_subsys_state, destroy_work); mutex_lock(&cgroup_mutex); - offline_css(css); - mutex_unlock(&cgroup_mutex); - css_put(css); + do { + offline_css(css); + css_put(css); + /* @css can't go away while we're holding cgroup_mutex */ + css = css->parent; + } while (css && atomic_dec_and_test(&css->online_cnt)); + + mutex_unlock(&cgroup_mutex); } /* css kill confirmation processing requires process context, bounce */ @@ -5031,8 +5041,10 @@ static void css_killed_ref_fn(struct percpu_ref *ref) struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); - INIT_WORK(&css->destroy_work, css_killed_work_fn); - queue_work(cgroup_destroy_wq, &css->destroy_work); + if (atomic_dec_and_test(&css->online_cnt)) { + INIT_WORK(&css->destroy_work, css_killed_work_fn); + queue_work(cgroup_destroy_wq, &css->destroy_work); + } } /** -- cgit v1.2.3 From 71f50c6d9a2276f3ec85384bffe2aee1962f4669 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 22 Jan 2016 01:33:51 +0900 Subject: of: drop symbols declared by _OF_DECLARE() from modules The users of this macro (OF_EARLYCON_DECLARE, CLK_OF_DECLARE, IRQCHIP_DECLARE, etc.) are only parsed in the early boot stage. Such symbols contained in modules are never used. This commit fixes the link error introduced by commit b8d20e06eaad ("serial: 8250_uniphier: add earlycon support"); the combination of CONFIG_SERIAL_8250_UNIPHIER=m and CONFIG_SERIAL_8250_CONSOLE=y fails to link: ERROR: "early_serial8250_setup" [drivers/tty/serial/8250/8250_uniphier.ko] undefined! Fixes: b8d20e06eaad ("serial: 8250_uniphier: add earlycon support") Signed-off-by: Masahiro Yamada Acked-by: Arnd Bergmann Signed-off-by: Rob Herring --- include/linux/of.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index dd10626a615f..dc6e39696b64 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -929,7 +929,7 @@ static inline int of_get_available_child_count(const struct device_node *np) return num; } -#ifdef CONFIG_OF +#if defined(CONFIG_OF) && !defined(MODULE) #define _OF_DECLARE(table, name, compat, fn, fn_type) \ static const struct of_device_id __of_table_##name \ __used __section(__##table##_of_table) \ -- cgit v1.2.3 From fb3296335500aaff61333df8eabbccf28761c79d Mon Sep 17 00:00:00 2001 From: Danesh Petigara Date: Mon, 11 Jan 2016 13:22:26 -0800 Subject: drivers: ata: wake port before DMA stop for ALPM The AHCI driver code stops and starts port DMA engines at will without considering the power state of the particular port. The AHCI specification isn't very clear on how to handle this scenario, leaving implementation open to interpretation. Broadcom's STB SATA host controller is unable to handle port DMA controller restarts when the port in question is in low power mode. When a port enters partial or slumber mode, its PHY is powered down. When a controller restart is requested, the controller's internal state machine expects the PHY to be brought back up by software which never happens in this case, resulting in failures. To avoid this situation, logic is added to manually wake up the port just before its DMA engine is stopped, if the port happens to be in a low power state. HBA initiated power management ensures that the port eventually returns to its configured low power state, when the link is idle (as per the conditions listed in the spec). A new host flag is also added to ensure this logic is only exercised for hosts with the above limitation. tj: Formatting changes. Signed-off-by: Danesh Petigara Reviewed-by: Markus Mayer Signed-off-by: Tejun Heo --- drivers/ata/ahci.h | 1 + drivers/ata/ahci_brcmstb.c | 1 + drivers/ata/libahci.c | 23 ++++++++++++++++++++++- include/linux/libata.h | 1 + 4 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index a4faa438889c..a44c75d4c284 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -250,6 +250,7 @@ enum { AHCI_HFLAG_MULTI_MSI = 0, AHCI_HFLAG_MULTI_MSIX = 0, #endif + AHCI_HFLAG_WAKE_BEFORE_STOP = (1 << 22), /* wake before DMA stop */ /* ap->flags bits */ diff --git a/drivers/ata/ahci_brcmstb.c b/drivers/ata/ahci_brcmstb.c index b36cae2fd04b..e87bcec0fd7c 100644 --- a/drivers/ata/ahci_brcmstb.c +++ b/drivers/ata/ahci_brcmstb.c @@ -317,6 +317,7 @@ static int brcm_ahci_probe(struct platform_device *pdev) if (IS_ERR(hpriv)) return PTR_ERR(hpriv); hpriv->plat_data = priv; + hpriv->flags = AHCI_HFLAG_WAKE_BEFORE_STOP; brcm_sata_alpm_init(hpriv); diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index d61740e78d6d..284a176076f5 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -593,8 +593,22 @@ EXPORT_SYMBOL_GPL(ahci_start_engine); int ahci_stop_engine(struct ata_port *ap) { void __iomem *port_mmio = ahci_port_base(ap); + struct ahci_host_priv *hpriv = ap->host->private_data; u32 tmp; + /* + * On some controllers, stopping a port's DMA engine while the port + * is in ALPM state (partial or slumber) results in failures on + * subsequent DMA engine starts. For those controllers, put the + * port back in active state before stopping its DMA engine. + */ + if ((hpriv->flags & AHCI_HFLAG_WAKE_BEFORE_STOP) && + (ap->link.lpm_policy > ATA_LPM_MAX_POWER) && + ahci_set_lpm(&ap->link, ATA_LPM_MAX_POWER, ATA_LPM_WAKE_ONLY)) { + dev_err(ap->host->dev, "Failed to wake up port before engine stop\n"); + return -EIO; + } + tmp = readl(port_mmio + PORT_CMD); /* check if the HBA is idle */ @@ -689,6 +703,9 @@ static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, void __iomem *port_mmio = ahci_port_base(ap); if (policy != ATA_LPM_MAX_POWER) { + /* wakeup flag only applies to the max power policy */ + hints &= ~ATA_LPM_WAKE_ONLY; + /* * Disable interrupts on Phy Ready. This keeps us from * getting woken up due to spurious phy ready @@ -704,7 +721,8 @@ static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, u32 cmd = readl(port_mmio + PORT_CMD); if (policy == ATA_LPM_MAX_POWER || !(hints & ATA_LPM_HIPM)) { - cmd &= ~(PORT_CMD_ASP | PORT_CMD_ALPE); + if (!(hints & ATA_LPM_WAKE_ONLY)) + cmd &= ~(PORT_CMD_ASP | PORT_CMD_ALPE); cmd |= PORT_CMD_ICC_ACTIVE; writel(cmd, port_mmio + PORT_CMD); @@ -712,6 +730,9 @@ static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, /* wait 10ms to be sure we've come out of LPM state */ ata_msleep(ap, 10); + + if (hints & ATA_LPM_WAKE_ONLY) + return 0; } else { cmd |= PORT_CMD_ALPE; if (policy == ATA_LPM_MIN_POWER) diff --git a/include/linux/libata.h b/include/linux/libata.h index 851821bfd553..bec2abbd7ab2 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -526,6 +526,7 @@ enum ata_lpm_policy { enum ata_lpm_hints { ATA_LPM_EMPTY = (1 << 0), /* port empty/probing */ ATA_LPM_HIPM = (1 << 1), /* may use HIPM */ + ATA_LPM_WAKE_ONLY = (1 << 2), /* only wake up link */ }; /* forward declarations */ -- cgit v1.2.3 From 23d11a58a9a60dcb52c8fc6494efce908b24c295 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 29 Jan 2016 05:59:46 -0500 Subject: workqueue: skip flush dependency checks for legacy workqueues fca839c00a12 ("workqueue: warn if memory reclaim tries to flush !WQ_MEM_RECLAIM workqueue") implemented flush dependency warning which triggers if a PF_MEMALLOC task or WQ_MEM_RECLAIM workqueue tries to flush a !WQ_MEM_RECLAIM workquee. This assumes that workqueues marked with WQ_MEM_RECLAIM sit in memory reclaim path and making it depend on something which may need more memory to make forward progress can lead to deadlocks. Unfortunately, workqueues created with the legacy create*_workqueue() interface always have WQ_MEM_RECLAIM regardless of whether they are depended upon memory reclaim or not. These spurious WQ_MEM_RECLAIM markings cause spurious triggering of the flush dependency checks. WARNING: CPU: 0 PID: 6 at kernel/workqueue.c:2361 check_flush_dependency+0x138/0x144() workqueue: WQ_MEM_RECLAIM deferwq:deferred_probe_work_func is flushing !WQ_MEM_RECLAIM events:lru_add_drain_per_cpu ... Workqueue: deferwq deferred_probe_work_func [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x94/0xd4) [] (dump_stack) from [] (warn_slowpath_common+0x80/0xb0) [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x30/0x40) [] (warn_slowpath_fmt) from [] (check_flush_dependency+0x138/0x144) [] (check_flush_dependency) from [] (flush_work+0x50/0x15c) [] (flush_work) from [] (lru_add_drain_all+0x130/0x180) [] (lru_add_drain_all) from [] (migrate_prep+0x8/0x10) [] (migrate_prep) from [] (alloc_contig_range+0xd8/0x338) [] (alloc_contig_range) from [] (cma_alloc+0xe0/0x1ac) [] (cma_alloc) from [] (__alloc_from_contiguous+0x38/0xd8) [] (__alloc_from_contiguous) from [] (__dma_alloc+0x240/0x278) [] (__dma_alloc) from [] (arm_dma_alloc+0x54/0x5c) [] (arm_dma_alloc) from [] (dmam_alloc_coherent+0xc0/0xec) [] (dmam_alloc_coherent) from [] (ahci_port_start+0x150/0x1dc) [] (ahci_port_start) from [] (ata_host_start.part.3+0xc8/0x1c8) [] (ata_host_start.part.3) from [] (ata_host_activate+0x50/0x148) [] (ata_host_activate) from [] (ahci_host_activate+0x44/0x114) [] (ahci_host_activate) from [] (ahci_platform_init_host+0x1d8/0x3c8) [] (ahci_platform_init_host) from [] (tegra_ahci_probe+0x448/0x4e8) [] (tegra_ahci_probe) from [] (platform_drv_probe+0x50/0xac) [] (platform_drv_probe) from [] (driver_probe_device+0x214/0x2c0) [] (driver_probe_device) from [] (bus_for_each_drv+0x60/0x94) [] (bus_for_each_drv) from [] (__device_attach+0xb0/0x114) [] (__device_attach) from [] (bus_probe_device+0x84/0x8c) [] (bus_probe_device) from [] (deferred_probe_work_func+0x68/0x98) [] (deferred_probe_work_func) from [] (process_one_work+0x120/0x3f8) [] (process_one_work) from [] (worker_thread+0x38/0x55c) [] (worker_thread) from [] (kthread+0xdc/0xf4) [] (kthread) from [] (ret_from_fork+0x14/0x3c) Fix it by marking workqueues created via create*_workqueue() with __WQ_LEGACY and disabling flush dependency checks on them. Signed-off-by: Tejun Heo Reported-and-tested-by: Thierry Reding Link: http://lkml.kernel.org/g/20160126173843.GA11115@ulmo.nvidia.com Fixes: fca839c00a12 ("workqueue: warn if memory reclaim tries to flush !WQ_MEM_RECLAIM workqueue") --- include/linux/workqueue.h | 9 +++++---- kernel/workqueue.c | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 0e32bc71245e..ca73c503b92a 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -311,6 +311,7 @@ enum { __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ + __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ @@ -411,12 +412,12 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) #define create_workqueue(name) \ - alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name)) + alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) #define create_freezable_workqueue(name) \ - alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \ - 1, (name)) + alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \ + WQ_MEM_RECLAIM, 1, (name)) #define create_singlethread_workqueue(name) \ - alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name) + alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name) extern void destroy_workqueue(struct workqueue_struct *wq); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 61a0264e28f9..dc7faad63646 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2355,7 +2355,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq, WARN_ONCE(current->flags & PF_MEMALLOC, "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf", current->pid, current->comm, target_wq->name, target_func); - WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM), + WARN_ONCE(worker && ((worker->current_pwq->wq->flags & + (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM), "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf", worker->current_pwq->wq->name, worker->current_func, target_wq->name, target_func); -- cgit v1.2.3 From 8a9ebe717a133ba7bc90b06047f43cc6b8bcb8b3 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 18 Jan 2016 14:09:27 -0600 Subject: target: Fix WRITE_SAME/DISCARD conversion to linux 512b sectors In a couple places we are not converting to/from the Linux block layer 512 bytes sectors. 1. The request queue values and what we do are a mismatch of things: max_discard_sectors - This is in linux block layer 512 byte sectors. We are just copying this to max_unmap_lba_count. discard_granularity - This is in bytes. We are converting it to Linux block layer 512 byte sectors. discard_alignment - This is in bytes. We are just copying this over. The problem is that the core LIO code exports these values in spc_emulate_evpd_b0 and we use them to test request arguments in sbc_execute_unmap, but we never convert to the block size we export to the initiator. If we are not using 512 byte sectors then we are exporting the wrong values or are checks are off. And, for the discard_alignment/bytes case we are just plain messed up. 2. blkdev_issue_discard's start and number of sector arguments are supposed to be in linux block layer 512 byte sectors. We are currently passing in the values we get from the initiator which might be based on some other sector size. There is a similar problem in iblock_execute_write_same where the bio functions want values in 512 byte sectors but we are passing in what we got from the initiator. Signed-off-by: Mike Christie Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 44 +++++++++++++++++++++++++++ drivers/target/target_core_file.c | 29 ++++++------------ drivers/target/target_core_iblock.c | 58 +++++++++--------------------------- include/target/target_core_backend.h | 3 ++ 4 files changed, 70 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index cacd97a8cbd0..da457e25717a 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -828,6 +828,50 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) return dev; } +/* + * Check if the underlying struct block_device request_queue supports + * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM + * in ATA and we need to set TPE=1 + */ +bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, + struct request_queue *q, int block_size) +{ + if (!blk_queue_discard(q)) + return false; + + attrib->max_unmap_lba_count = (q->limits.max_discard_sectors << 9) / + block_size; + /* + * Currently hardcoded to 1 in Linux/SCSI code.. + */ + attrib->max_unmap_block_desc_count = 1; + attrib->unmap_granularity = q->limits.discard_granularity / block_size; + attrib->unmap_granularity_alignment = q->limits.discard_alignment / + block_size; + attrib->unmap_zeroes_data = q->limits.discard_zeroes_data; + return true; +} +EXPORT_SYMBOL(target_configure_unmap_from_queue); + +/* + * Convert from blocksize advertised to the initiator to the 512 byte + * units unconditionally used by the Linux block layer. + */ +sector_t target_to_linux_sector(struct se_device *dev, sector_t lb) +{ + switch (dev->dev_attrib.block_size) { + case 4096: + return lb << 3; + case 2048: + return lb << 2; + case 1024: + return lb << 1; + default: + return lb; + } +} +EXPORT_SYMBOL(target_to_linux_sector); + int target_configure_device(struct se_device *dev) { struct se_hba *hba = dev->se_hba; diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index e3195700211a..75f0f08b2a34 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -160,25 +160,11 @@ static int fd_configure_device(struct se_device *dev) " block_device blocks: %llu logical_block_size: %d\n", dev_size, div_u64(dev_size, fd_dev->fd_block_size), fd_dev->fd_block_size); - /* - * Check if the underlying struct block_device request_queue supports - * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM - * in ATA and we need to set TPE=1 - */ - if (blk_queue_discard(q)) { - dev->dev_attrib.max_unmap_lba_count = - q->limits.max_discard_sectors; - /* - * Currently hardcoded to 1 in Linux/SCSI code.. - */ - dev->dev_attrib.max_unmap_block_desc_count = 1; - dev->dev_attrib.unmap_granularity = - q->limits.discard_granularity >> 9; - dev->dev_attrib.unmap_granularity_alignment = - q->limits.discard_alignment; + + if (target_configure_unmap_from_queue(&dev->dev_attrib, q, + fd_dev->fd_block_size)) pr_debug("IFILE: BLOCK Discard support available," - " disabled by default\n"); - } + " disabled by default\n"); /* * Enable write same emulation for IBLOCK and use 0xFFFF as * the smaller WRITE_SAME(10) only has a two-byte block count. @@ -490,9 +476,12 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) if (S_ISBLK(inode->i_mode)) { /* The backend is block device, use discard */ struct block_device *bdev = inode->i_bdev; + struct se_device *dev = cmd->se_dev; - ret = blkdev_issue_discard(bdev, lba, - nolb, GFP_KERNEL, 0); + ret = blkdev_issue_discard(bdev, + target_to_linux_sector(dev, lba), + target_to_linux_sector(dev, nolb), + GFP_KERNEL, 0); if (ret < 0) { pr_warn("FILEIO: blkdev_issue_discard() failed: %d\n", ret); diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 5a2899f9f50b..abe4eb997a84 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -121,29 +121,11 @@ static int iblock_configure_device(struct se_device *dev) dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q); dev->dev_attrib.hw_queue_depth = q->nr_requests; - /* - * Check if the underlying struct block_device request_queue supports - * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM - * in ATA and we need to set TPE=1 - */ - if (blk_queue_discard(q)) { - dev->dev_attrib.max_unmap_lba_count = - q->limits.max_discard_sectors; - - /* - * Currently hardcoded to 1 in Linux/SCSI code.. - */ - dev->dev_attrib.max_unmap_block_desc_count = 1; - dev->dev_attrib.unmap_granularity = - q->limits.discard_granularity >> 9; - dev->dev_attrib.unmap_granularity_alignment = - q->limits.discard_alignment; - dev->dev_attrib.unmap_zeroes_data = - q->limits.discard_zeroes_data; - + if (target_configure_unmap_from_queue(&dev->dev_attrib, q, + dev->dev_attrib.hw_block_size)) pr_debug("IBLOCK: BLOCK Discard support available," - " disabled by default\n"); - } + " disabled by default\n"); + /* * Enable write same emulation for IBLOCK and use 0xFFFF as * the smaller WRITE_SAME(10) only has a two-byte block count. @@ -415,9 +397,13 @@ static sense_reason_t iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) { struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd; + struct se_device *dev = cmd->se_dev; int ret; - ret = blkdev_issue_discard(bdev, lba, nolb, GFP_KERNEL, 0); + ret = blkdev_issue_discard(bdev, + target_to_linux_sector(dev, lba), + target_to_linux_sector(dev, nolb), + GFP_KERNEL, 0); if (ret < 0) { pr_err("blkdev_issue_discard() failed: %d\n", ret); return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; @@ -433,8 +419,10 @@ iblock_execute_write_same(struct se_cmd *cmd) struct scatterlist *sg; struct bio *bio; struct bio_list list; - sector_t block_lba = cmd->t_task_lba; - sector_t sectors = sbc_get_write_same_sectors(cmd); + struct se_device *dev = cmd->se_dev; + sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba); + sector_t sectors = target_to_linux_sector(dev, + sbc_get_write_same_sectors(cmd)); if (cmd->prot_op) { pr_err("WRITE_SAME: Protection information with IBLOCK" @@ -648,12 +636,12 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, enum dma_data_direction data_direction) { struct se_device *dev = cmd->se_dev; + sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba); struct iblock_req *ibr; struct bio *bio, *bio_start; struct bio_list list; struct scatterlist *sg; u32 sg_num = sgl_nents; - sector_t block_lba; unsigned bio_cnt; int rw = 0; int i; @@ -679,24 +667,6 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, rw = READ; } - /* - * Convert the blocksize advertised to the initiator to the 512 byte - * units unconditionally used by the Linux block layer. - */ - if (dev->dev_attrib.block_size == 4096) - block_lba = (cmd->t_task_lba << 3); - else if (dev->dev_attrib.block_size == 2048) - block_lba = (cmd->t_task_lba << 2); - else if (dev->dev_attrib.block_size == 1024) - block_lba = (cmd->t_task_lba << 1); - else if (dev->dev_attrib.block_size == 512) - block_lba = cmd->t_task_lba; - else { - pr_err("Unsupported SCSI -> BLOCK LBA conversion:" - " %u\n", dev->dev_attrib.block_size); - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - } - ibr = kzalloc(sizeof(struct iblock_req), GFP_KERNEL); if (!ibr) goto fail; diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 56cf8e485ef2..28ee5c2e6bcd 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -94,5 +94,8 @@ sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, sense_reason_t (*exec_cmd)(struct se_cmd *cmd)); bool target_sense_desc_format(struct se_device *dev); +sector_t target_to_linux_sector(struct se_device *dev, sector_t lb); +bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, + struct request_queue *q, int block_size); #endif /* TARGET_CORE_BACKEND_H */ -- cgit v1.2.3 From 0f26922fe5dc5724b1adbbd54b21bad03590b4f3 Mon Sep 17 00:00:00 2001 From: zengtao Date: Tue, 2 Feb 2016 11:38:34 +0800 Subject: cputime: Prevent 32bit overflow in time[val|spec]_to_cputime() The datatype __kernel_time_t is u32 on 32bit platform, so its subject to overflows in the timeval/timespec to cputime conversion. Currently the following functions are affected: 1. setitimer() 2. timer_create/timer_settime() 3. sys_clock_nanosleep This can happen on MIPS32 and ARM32 with "Full dynticks CPU time accounting" enabled, which is required for CONFIG_NO_HZ_FULL. Enforce u64 conversion to prevent the overflow. Fixes: 31c1fc818715 ("ARM: Kconfig: allow full nohz CPU accounting") Signed-off-by: zengtao Reviewed-by: Arnd Bergmann Cc: Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1454384314-154784-1-git-send-email-prime.zeng@huawei.com Signed-off-by: Thomas Gleixner --- include/asm-generic/cputime_nsecs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index 0419485891f2..0f1c6f315cdc 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -75,7 +75,7 @@ typedef u64 __nocast cputime64_t; */ static inline cputime_t timespec_to_cputime(const struct timespec *val) { - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec; + u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + val->tv_nsec; return (__force cputime_t) ret; } static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) @@ -91,7 +91,8 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) */ static inline cputime_t timeval_to_cputime(const struct timeval *val) { - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC; + u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + + val->tv_usec * NSEC_PER_USEC; return (__force cputime_t) ret; } static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) -- cgit v1.2.3 From 29569941688cdf647f953b2eb073aa6ec9dd3fc1 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 28 Jan 2016 16:33:50 +0000 Subject: clk: tegra: Add the APB2APE audio clock on Tegra210 The APB2APE clock for the audio subsystem is required for powering up the audio power domain and accessing the various modules in this subsystem on Tegra210 devices. Add this clock for Tegra210. Signed-off-by: Jon Hunter Signed-off-by: Thierry Reding --- drivers/clk/tegra/clk-id.h | 1 + drivers/clk/tegra/clk-tegra-periph.c | 1 + drivers/clk/tegra/clk-tegra210.c | 1 + include/dt-bindings/clock/tegra210-car.h | 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/clk/tegra/clk-id.h b/drivers/clk/tegra/clk-id.h index 19ce0738ee76..62ea38187b71 100644 --- a/drivers/clk/tegra/clk-id.h +++ b/drivers/clk/tegra/clk-id.h @@ -11,6 +11,7 @@ enum clk_id { tegra_clk_afi, tegra_clk_amx, tegra_clk_amx1, + tegra_clk_apb2ape, tegra_clk_apbdma, tegra_clk_apbif, tegra_clk_ape, diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c index 1860df1862dd..ea2b9cbf9e70 100644 --- a/drivers/clk/tegra/clk-tegra-periph.c +++ b/drivers/clk/tegra/clk-tegra-periph.c @@ -829,6 +829,7 @@ static struct tegra_periph_init_data gate_clks[] = { GATE("xusb_gate", "osc", 143, 0, tegra_clk_xusb_gate, 0), GATE("pll_p_out_cpu", "pll_p", 223, 0, tegra_clk_pll_p_out_cpu, 0), GATE("pll_p_out_adsp", "pll_p", 187, 0, tegra_clk_pll_p_out_adsp, 0), + GATE("apb2ape", "clk_m", 107, 0, tegra_clk_apb2ape, 0), }; static struct tegra_periph_init_data div_clks[] = { diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c index ffcb86a667d9..14c1841eb29b 100644 --- a/drivers/clk/tegra/clk-tegra210.c +++ b/drivers/clk/tegra/clk-tegra210.c @@ -2204,6 +2204,7 @@ static struct tegra_clk tegra210_clks[tegra_clk_max] __initdata = { [tegra_clk_pll_c4_out1] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT1, .present = true }, [tegra_clk_pll_c4_out2] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT2, .present = true }, [tegra_clk_pll_c4_out3] = { .dt_id = TEGRA210_CLK_PLL_C4_OUT3, .present = true }, + [tegra_clk_apb2ape] = { .dt_id = TEGRA210_CLK_APB2APE, .present = true }, }; static struct tegra_devclk devclks[] __initdata = { diff --git a/include/dt-bindings/clock/tegra210-car.h b/include/dt-bindings/clock/tegra210-car.h index 6f45aea49e4f..0a05b0d36ae7 100644 --- a/include/dt-bindings/clock/tegra210-car.h +++ b/include/dt-bindings/clock/tegra210-car.h @@ -126,7 +126,7 @@ /* 104 */ /* 105 */ #define TEGRA210_CLK_D_AUDIO 106 -/* 107 ( affects abp -> ape) */ +#define TEGRA210_CLK_APB2APE 107 /* 108 */ /* 109 */ /* 110 */ -- cgit v1.2.3 From 4b0e4e4af6c6dc8354dcb72182d52c1bc55f12fc Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 30 Jan 2016 07:59:32 +0200 Subject: drm: add helper to check for wc memory support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Dave Airlie Signed-off-by: Oded Gabbay Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- include/drm/drm_cache.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index 7bfb063029d8..461a0558bca4 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -35,4 +35,13 @@ void drm_clflush_pages(struct page *pages[], unsigned long num_pages); +static inline bool drm_arch_can_wc_memory(void) +{ +#if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE) + return false; +#else + return true; +#endif +} + #endif -- cgit v1.2.3 From 2db8f9a1d86aa32a9cbf1a975882b4fa162f040f Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 26 Jan 2016 12:43:00 -0600 Subject: ACPI / CPPC: remove redundant mbox_send_message() declaration Remove a redundant function declaration in cppc_acpi.h for mbox_send_message(). That function is defined in mailbox_client.h, which is already included. Signed-off-by: Timur Tabi Signed-off-by: Rafael J. Wysocki --- include/acpi/cppc_acpi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 717a29810473..dad8af3ebeb5 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -133,6 +133,5 @@ extern int acpi_get_psd_map(struct cpudata **); /* Methods to interact with the PCC mailbox controller. */ extern struct mbox_chan * pcc_mbox_request_channel(struct mbox_client *, unsigned int); -extern int mbox_send_message(struct mbox_chan *chan, void *mssg); #endif /* _CPPC_ACPI_H*/ -- cgit v1.2.3 From 8244062ef1e54502ef55f54cced659913f244c3e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 3 Feb 2016 16:55:26 +1030 Subject: modules: fix longstanding /proc/kallsyms vs module insertion race. For CONFIG_KALLSYMS, we keep two symbol tables and two string tables. There's one full copy, marked SHF_ALLOC and laid out at the end of the module's init section. There's also a cut-down version that only contains core symbols and strings, and lives in the module's core section. After module init (and before we free the module memory), we switch the mod->symtab, mod->num_symtab and mod->strtab to point to the core versions. We do this under the module_mutex. However, kallsyms doesn't take the module_mutex: it uses preempt_disable() and rcu tricks to walk through the modules, because it's used in the oops path. It's also used in /proc/kallsyms. There's nothing atomic about the change of these variables, so we can get the old (larger!) num_symtab and the new symtab pointer; in fact this is what I saw when trying to reproduce. By grouping these variables together, we can use a carefully-dereferenced pointer to ensure we always get one or the other (the free of the module init section is already done in an RCU callback, so that's safe). We allocate the init one at the end of the module init section, and keep the core one inside the struct module itself (it could also have been allocated at the end of the module core, but that's probably overkill). Reported-by: Weilong Chen Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111541 Cc: stable@kernel.org Signed-off-by: Rusty Russell --- include/linux/module.h | 19 +++++---- kernel/module.c | 112 ++++++++++++++++++++++++++++++------------------- 2 files changed, 79 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 4560d8f1545d..2bb0c3085706 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -324,6 +324,12 @@ struct module_layout { #define __module_layout_align #endif +struct mod_kallsyms { + Elf_Sym *symtab; + unsigned int num_symtab; + char *strtab; +}; + struct module { enum module_state state; @@ -405,15 +411,10 @@ struct module { #endif #ifdef CONFIG_KALLSYMS - /* - * We keep the symbol and string tables for kallsyms. - * The core_* fields below are temporary, loader-only (they - * could really be discarded after module init). - */ - Elf_Sym *symtab, *core_symtab; - unsigned int num_symtab, core_num_syms; - char *strtab, *core_strtab; - + /* Protected by RCU and/or module_mutex: use rcu_dereference() */ + struct mod_kallsyms *kallsyms; + struct mod_kallsyms core_kallsyms; + /* Section attributes */ struct module_sect_attrs *sect_attrs; diff --git a/kernel/module.c b/kernel/module.c index 1e79d8157712..9537da37ce87 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -303,6 +303,9 @@ struct load_info { struct _ddebug *debug; unsigned int num_debug; bool sig_ok; +#ifdef CONFIG_KALLSYMS + unsigned long mod_kallsyms_init_off; +#endif struct { unsigned int sym, str, mod, vers, info, pcpu; } index; @@ -2480,10 +2483,21 @@ static void layout_symtab(struct module *mod, struct load_info *info) strsect->sh_flags |= SHF_ALLOC; strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect, info->index.str) | INIT_OFFSET_MASK; - mod->init_layout.size = debug_align(mod->init_layout.size); pr_debug("\t%s\n", info->secstrings + strsect->sh_name); + + /* We'll tack temporary mod_kallsyms on the end. */ + mod->init_layout.size = ALIGN(mod->init_layout.size, + __alignof__(struct mod_kallsyms)); + info->mod_kallsyms_init_off = mod->init_layout.size; + mod->init_layout.size += sizeof(struct mod_kallsyms); + mod->init_layout.size = debug_align(mod->init_layout.size); } +/* + * We use the full symtab and strtab which layout_symtab arranged to + * be appended to the init section. Later we switch to the cut-down + * core-only ones. + */ static void add_kallsyms(struct module *mod, const struct load_info *info) { unsigned int i, ndst; @@ -2492,29 +2506,34 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) char *s; Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; - mod->symtab = (void *)symsec->sh_addr; - mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym); + /* Set up to point into init section. */ + mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off; + + mod->kallsyms->symtab = (void *)symsec->sh_addr; + mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym); /* Make sure we get permanent strtab: don't use info->strtab. */ - mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr; + mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr; /* Set types up while we still have access to sections. */ - for (i = 0; i < mod->num_symtab; i++) - mod->symtab[i].st_info = elf_type(&mod->symtab[i], info); - - mod->core_symtab = dst = mod->core_layout.base + info->symoffs; - mod->core_strtab = s = mod->core_layout.base + info->stroffs; - src = mod->symtab; - for (ndst = i = 0; i < mod->num_symtab; i++) { + for (i = 0; i < mod->kallsyms->num_symtab; i++) + mod->kallsyms->symtab[i].st_info + = elf_type(&mod->kallsyms->symtab[i], info); + + /* Now populate the cut down core kallsyms for after init. */ + mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs; + mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs; + src = mod->kallsyms->symtab; + for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) { if (i == 0 || is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum, info->index.pcpu)) { dst[ndst] = src[i]; - dst[ndst++].st_name = s - mod->core_strtab; - s += strlcpy(s, &mod->strtab[src[i].st_name], + dst[ndst++].st_name = s - mod->core_kallsyms.strtab; + s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name], KSYM_NAME_LEN) + 1; } } - mod->core_num_syms = ndst; + mod->core_kallsyms.num_symtab = ndst; } #else static inline void layout_symtab(struct module *mod, struct load_info *info) @@ -3263,9 +3282,8 @@ static noinline int do_init_module(struct module *mod) module_put(mod); trim_init_extable(mod); #ifdef CONFIG_KALLSYMS - mod->num_symtab = mod->core_num_syms; - mod->symtab = mod->core_symtab; - mod->strtab = mod->core_strtab; + /* Switch to core kallsyms now init is done: kallsyms may be walking! */ + rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms); #endif mod_tree_remove_init(mod); disable_ro_nx(&mod->init_layout); @@ -3627,9 +3645,9 @@ static inline int is_arm_mapping_symbol(const char *str) && (str[2] == '\0' || str[2] == '.'); } -static const char *symname(struct module *mod, unsigned int symnum) +static const char *symname(struct mod_kallsyms *kallsyms, unsigned int symnum) { - return mod->strtab + mod->symtab[symnum].st_name; + return kallsyms->strtab + kallsyms->symtab[symnum].st_name; } static const char *get_ksymbol(struct module *mod, @@ -3639,6 +3657,7 @@ static const char *get_ksymbol(struct module *mod, { unsigned int i, best = 0; unsigned long nextval; + struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); /* At worse, next value is at end of module */ if (within_module_init(addr, mod)) @@ -3648,32 +3667,32 @@ static const char *get_ksymbol(struct module *mod, /* Scan for closest preceding symbol, and next symbol. (ELF starts real symbols at 1). */ - for (i = 1; i < mod->num_symtab; i++) { - if (mod->symtab[i].st_shndx == SHN_UNDEF) + for (i = 1; i < kallsyms->num_symtab; i++) { + if (kallsyms->symtab[i].st_shndx == SHN_UNDEF) continue; /* We ignore unnamed symbols: they're uninformative * and inserted at a whim. */ - if (*symname(mod, i) == '\0' - || is_arm_mapping_symbol(symname(mod, i))) + if (*symname(kallsyms, i) == '\0' + || is_arm_mapping_symbol(symname(kallsyms, i))) continue; - if (mod->symtab[i].st_value <= addr - && mod->symtab[i].st_value > mod->symtab[best].st_value) + if (kallsyms->symtab[i].st_value <= addr + && kallsyms->symtab[i].st_value > kallsyms->symtab[best].st_value) best = i; - if (mod->symtab[i].st_value > addr - && mod->symtab[i].st_value < nextval) - nextval = mod->symtab[i].st_value; + if (kallsyms->symtab[i].st_value > addr + && kallsyms->symtab[i].st_value < nextval) + nextval = kallsyms->symtab[i].st_value; } if (!best) return NULL; if (size) - *size = nextval - mod->symtab[best].st_value; + *size = nextval - kallsyms->symtab[best].st_value; if (offset) - *offset = addr - mod->symtab[best].st_value; - return symname(mod, best); + *offset = addr - kallsyms->symtab[best].st_value; + return symname(kallsyms, best); } /* For kallsyms to ask for address resolution. NULL means not found. Careful @@ -3763,18 +3782,21 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + struct mod_kallsyms *kallsyms; + if (mod->state == MODULE_STATE_UNFORMED) continue; - if (symnum < mod->num_symtab) { - *value = mod->symtab[symnum].st_value; - *type = mod->symtab[symnum].st_info; - strlcpy(name, symname(mod, symnum), KSYM_NAME_LEN); + kallsyms = rcu_dereference_sched(mod->kallsyms); + if (symnum < kallsyms->num_symtab) { + *value = kallsyms->symtab[symnum].st_value; + *type = kallsyms->symtab[symnum].st_info; + strlcpy(name, symname(kallsyms, symnum), KSYM_NAME_LEN); strlcpy(module_name, mod->name, MODULE_NAME_LEN); *exported = is_exported(name, *value, mod); preempt_enable(); return 0; } - symnum -= mod->num_symtab; + symnum -= kallsyms->num_symtab; } preempt_enable(); return -ERANGE; @@ -3783,11 +3805,12 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, static unsigned long mod_find_symname(struct module *mod, const char *name) { unsigned int i; + struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); - for (i = 0; i < mod->num_symtab; i++) - if (strcmp(name, symname(mod, i)) == 0 && - mod->symtab[i].st_info != 'U') - return mod->symtab[i].st_value; + for (i = 0; i < kallsyms->num_symtab; i++) + if (strcmp(name, symname(kallsyms, i)) == 0 && + kallsyms->symtab[i].st_info != 'U') + return kallsyms->symtab[i].st_value; return 0; } @@ -3826,11 +3849,14 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, module_assert_mutex(); list_for_each_entry(mod, &modules, list) { + /* We hold module_mutex: no need for rcu_dereference_sched */ + struct mod_kallsyms *kallsyms = mod->kallsyms; + if (mod->state == MODULE_STATE_UNFORMED) continue; - for (i = 0; i < mod->num_symtab; i++) { - ret = fn(data, symname(mod, i), - mod, mod->symtab[i].st_value); + for (i = 0; i < kallsyms->num_symtab; i++) { + ret = fn(data, symname(kallsyms, i), + mod, kallsyms->symtab[i].st_value); if (ret != 0) return ret; } -- cgit v1.2.3 From 06ab30034ed9c200a570ab13c017bde248ddb2a6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 31 Jan 2016 11:57:41 +0100 Subject: ALSA: rawmidi: Make snd_rawmidi_transmit() race-free A kernel WARNING in snd_rawmidi_transmit_ack() is triggered by syzkaller fuzzer: WARNING: CPU: 1 PID: 20739 at sound/core/rawmidi.c:1136 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [] warn_slowpath_common+0xd9/0x140 kernel/panic.c:482 [] warn_slowpath_null+0x29/0x30 kernel/panic.c:515 [] snd_rawmidi_transmit_ack+0x275/0x400 sound/core/rawmidi.c:1136 [] snd_virmidi_output_trigger+0x4b1/0x5a0 sound/core/seq/seq_virmidi.c:163 [< inline >] snd_rawmidi_output_trigger sound/core/rawmidi.c:150 [] snd_rawmidi_kernel_write1+0x549/0x780 sound/core/rawmidi.c:1223 [] snd_rawmidi_write+0x543/0xb30 sound/core/rawmidi.c:1273 [] __vfs_write+0x113/0x480 fs/read_write.c:528 [] vfs_write+0x167/0x4a0 fs/read_write.c:577 [< inline >] SYSC_write fs/read_write.c:624 [] SyS_write+0x111/0x220 fs/read_write.c:616 [] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 Also a similar warning is found but in another path: Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [] warn_slowpath_common+0xd9/0x140 kernel/panic.c:482 [] warn_slowpath_null+0x29/0x30 kernel/panic.c:515 [] rawmidi_transmit_ack+0x24a/0x3b0 sound/core/rawmidi.c:1133 [] snd_rawmidi_transmit_ack+0x51/0x80 sound/core/rawmidi.c:1163 [] snd_virmidi_output_trigger+0x2b6/0x570 sound/core/seq/seq_virmidi.c:185 [< inline >] snd_rawmidi_output_trigger sound/core/rawmidi.c:150 [] snd_rawmidi_kernel_write1+0x4bb/0x760 sound/core/rawmidi.c:1252 [] snd_rawmidi_write+0x543/0xb30 sound/core/rawmidi.c:1302 [] __vfs_write+0x113/0x480 fs/read_write.c:528 [] vfs_write+0x167/0x4a0 fs/read_write.c:577 [< inline >] SYSC_write fs/read_write.c:624 [] SyS_write+0x111/0x220 fs/read_write.c:616 [] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 In the former case, the reason is that virmidi has an open code calling snd_rawmidi_transmit_ack() with the value calculated outside the spinlock. We may use snd_rawmidi_transmit() in a loop just for consuming the input data, but even there, there is a race between snd_rawmidi_transmit_peek() and snd_rawmidi_tranmit_ack(). Similarly in the latter case, it calls snd_rawmidi_transmit_peek() and snd_rawmidi_tranmit_ack() separately without protection, so they are racy as well. The patch tries to address these issues by the following ways: - Introduce the unlocked versions of snd_rawmidi_transmit_peek() and snd_rawmidi_transmit_ack() to be called inside the explicit lock. - Rewrite snd_rawmidi_transmit() to be race-free (the former case). - Make the split calls (the latter case) protected in the rawmidi spin lock. BugLink: http://lkml.kernel.org/r/CACT4Y+YPq1+cYLkadwjWa5XjzF1_Vki1eHnVn-Lm0hzhSpu5PA@mail.gmail.com BugLink: http://lkml.kernel.org/r/CACT4Y+acG4iyphdOZx47Nyq_VHGbpJQK-6xNpiqUjaZYqsXOGw@mail.gmail.com Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai --- include/sound/rawmidi.h | 4 ++ sound/core/rawmidi.c | 98 ++++++++++++++++++++++++++++++++------------ sound/core/seq/seq_virmidi.c | 17 +++++--- 3 files changed, 88 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index fdabbb4ddba9..f730b91e472f 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -167,6 +167,10 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count); int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count); +int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, + unsigned char *buffer, int count); +int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, + int count); /* main midi functions */ diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index f75d1656272c..26ca02248885 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -1055,23 +1055,16 @@ int snd_rawmidi_transmit_empty(struct snd_rawmidi_substream *substream) EXPORT_SYMBOL(snd_rawmidi_transmit_empty); /** - * snd_rawmidi_transmit_peek - copy data from the internal buffer + * __snd_rawmidi_transmit_peek - copy data from the internal buffer * @substream: the rawmidi substream * @buffer: the buffer pointer * @count: data size to transfer * - * Copies data from the internal output buffer to the given buffer. - * - * Call this in the interrupt handler when the midi output is ready, - * and call snd_rawmidi_transmit_ack() after the transmission is - * finished. - * - * Return: The size of copied data, or a negative error code on failure. + * This is a variant of snd_rawmidi_transmit_peek() without spinlock. */ -int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, +int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count) { - unsigned long flags; int result, count1; struct snd_rawmidi_runtime *runtime = substream->runtime; @@ -1081,7 +1074,6 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, return -EINVAL; } result = 0; - spin_lock_irqsave(&runtime->lock, flags); if (runtime->avail >= runtime->buffer_size) { /* warning: lowlevel layer MUST trigger down the hardware */ goto __skip; @@ -1106,25 +1098,47 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, } } __skip: + return result; +} +EXPORT_SYMBOL(__snd_rawmidi_transmit_peek); + +/** + * snd_rawmidi_transmit_peek - copy data from the internal buffer + * @substream: the rawmidi substream + * @buffer: the buffer pointer + * @count: data size to transfer + * + * Copies data from the internal output buffer to the given buffer. + * + * Call this in the interrupt handler when the midi output is ready, + * and call snd_rawmidi_transmit_ack() after the transmission is + * finished. + * + * Return: The size of copied data, or a negative error code on failure. + */ +int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, + unsigned char *buffer, int count) +{ + struct snd_rawmidi_runtime *runtime = substream->runtime; + int result; + unsigned long flags; + + spin_lock_irqsave(&runtime->lock, flags); + result = __snd_rawmidi_transmit_peek(substream, buffer, count); spin_unlock_irqrestore(&runtime->lock, flags); return result; } EXPORT_SYMBOL(snd_rawmidi_transmit_peek); /** - * snd_rawmidi_transmit_ack - acknowledge the transmission + * __snd_rawmidi_transmit_ack - acknowledge the transmission * @substream: the rawmidi substream * @count: the transferred count * - * Advances the hardware pointer for the internal output buffer with - * the given size and updates the condition. - * Call after the transmission is finished. - * - * Return: The advanced size if successful, or a negative error code on failure. + * This is a variant of __snd_rawmidi_transmit_ack() without spinlock. */ -int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) +int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) { - unsigned long flags; struct snd_rawmidi_runtime *runtime = substream->runtime; if (runtime->buffer == NULL) { @@ -1132,7 +1146,6 @@ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) "snd_rawmidi_transmit_ack: output is not active!!!\n"); return -EINVAL; } - spin_lock_irqsave(&runtime->lock, flags); snd_BUG_ON(runtime->avail + count > runtime->buffer_size); runtime->hw_ptr += count; runtime->hw_ptr %= runtime->buffer_size; @@ -1142,9 +1155,32 @@ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) if (runtime->drain || snd_rawmidi_ready(substream)) wake_up(&runtime->sleep); } - spin_unlock_irqrestore(&runtime->lock, flags); return count; } +EXPORT_SYMBOL(__snd_rawmidi_transmit_ack); + +/** + * snd_rawmidi_transmit_ack - acknowledge the transmission + * @substream: the rawmidi substream + * @count: the transferred count + * + * Advances the hardware pointer for the internal output buffer with + * the given size and updates the condition. + * Call after the transmission is finished. + * + * Return: The advanced size if successful, or a negative error code on failure. + */ +int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) +{ + struct snd_rawmidi_runtime *runtime = substream->runtime; + int result; + unsigned long flags; + + spin_lock_irqsave(&runtime->lock, flags); + result = __snd_rawmidi_transmit_ack(substream, count); + spin_unlock_irqrestore(&runtime->lock, flags); + return result; +} EXPORT_SYMBOL(snd_rawmidi_transmit_ack); /** @@ -1160,12 +1196,22 @@ EXPORT_SYMBOL(snd_rawmidi_transmit_ack); int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count) { + struct snd_rawmidi_runtime *runtime = substream->runtime; + int result; + unsigned long flags; + + spin_lock_irqsave(&runtime->lock, flags); if (!substream->opened) - return -EBADFD; - count = snd_rawmidi_transmit_peek(substream, buffer, count); - if (count < 0) - return count; - return snd_rawmidi_transmit_ack(substream, count); + result = -EBADFD; + else { + count = __snd_rawmidi_transmit_peek(substream, buffer, count); + if (count <= 0) + result = count; + else + result = __snd_rawmidi_transmit_ack(substream, count); + } + spin_unlock_irqrestore(&runtime->lock, flags); + return result; } EXPORT_SYMBOL(snd_rawmidi_transmit); diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index f71aedfb408c..c82ed3e70506 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -155,21 +155,26 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream, struct snd_virmidi *vmidi = substream->runtime->private_data; int count, res; unsigned char buf[32], *pbuf; + unsigned long flags; if (up) { vmidi->trigger = 1; if (vmidi->seq_mode == SNDRV_VIRMIDI_SEQ_DISPATCH && !(vmidi->rdev->flags & SNDRV_VIRMIDI_SUBSCRIBE)) { - snd_rawmidi_transmit_ack(substream, substream->runtime->buffer_size - substream->runtime->avail); - return; /* ignored */ + while (snd_rawmidi_transmit(substream, buf, + sizeof(buf)) > 0) { + /* ignored */ + } + return; } if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) { if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0) return; vmidi->event.type = SNDRV_SEQ_EVENT_NONE; } + spin_lock_irqsave(&substream->runtime->lock, flags); while (1) { - count = snd_rawmidi_transmit_peek(substream, buf, sizeof(buf)); + count = __snd_rawmidi_transmit_peek(substream, buf, sizeof(buf)); if (count <= 0) break; pbuf = buf; @@ -179,16 +184,18 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream, snd_midi_event_reset_encode(vmidi->parser); continue; } - snd_rawmidi_transmit_ack(substream, res); + __snd_rawmidi_transmit_ack(substream, res); pbuf += res; count -= res; if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) { if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0) - return; + goto out; vmidi->event.type = SNDRV_SEQ_EVENT_NONE; } } } + out: + spin_unlock_irqrestore(&substream->runtime->lock, flags); } else { vmidi->trigger = 0; } -- cgit v1.2.3 From a3d0a918502cc73af4f60da2cc4c5cac5573f183 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 2 Feb 2016 16:57:08 -0800 Subject: thp: make split_queue per-node Andrea Arcangeli suggested to make split queue per-node to improve scalability. Let's do it. Signed-off-by: Kirill A. Shutemov Suggested-by: Andrea Arcangeli Reviewed-by: Andrea Arcangeli Cc: Hugh Dickins Cc: Dave Hansen Cc: Mel Gorman Cc: Rik van Riel Cc: Vlastimil Babka Cc: "Aneesh Kumar K.V" Cc: Johannes Weiner Cc: Michal Hocko Cc: Jerome Marchand Cc: Sasha Levin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 ++++++ mm/huge_memory.c | 49 ++++++++++++++++++++++++++----------------------- mm/page_alloc.c | 5 +++++ 3 files changed, 37 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 33bb1b19273e..7b6c2cfee390 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -682,6 +682,12 @@ typedef struct pglist_data { */ unsigned long first_deferred_pfn; #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + spinlock_t split_queue_lock; + struct list_head split_queue; + unsigned long split_queue_len; +#endif } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index fd3a07b3e6f4..253a25e007d7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -138,9 +138,6 @@ static struct khugepaged_scan khugepaged_scan = { .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head), }; -static DEFINE_SPINLOCK(split_queue_lock); -static LIST_HEAD(split_queue); -static unsigned long split_queue_len; static struct shrinker deferred_split_shrinker; static void set_recommended_min_free_kbytes(void) @@ -3358,6 +3355,7 @@ int total_mapcount(struct page *page) int split_huge_page_to_list(struct page *page, struct list_head *list) { struct page *head = compound_head(page); + struct pglist_data *pgdata = NODE_DATA(page_to_nid(head)); struct anon_vma *anon_vma; int count, mapcount, ret; bool mlocked; @@ -3401,19 +3399,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) lru_add_drain(); /* Prevent deferred_split_scan() touching ->_count */ - spin_lock_irqsave(&split_queue_lock, flags); + spin_lock_irqsave(&pgdata->split_queue_lock, flags); count = page_count(head); mapcount = total_mapcount(head); if (!mapcount && count == 1) { if (!list_empty(page_deferred_list(head))) { - split_queue_len--; + pgdata->split_queue_len--; list_del(page_deferred_list(head)); } - spin_unlock_irqrestore(&split_queue_lock, flags); + spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); __split_huge_page(page, list); ret = 0; } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { - spin_unlock_irqrestore(&split_queue_lock, flags); + spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); pr_alert("total_mapcount: %u, page_count(): %u\n", mapcount, count); if (PageTail(page)) @@ -3421,7 +3419,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) dump_page(page, "total_mapcount(head) > 0"); BUG(); } else { - spin_unlock_irqrestore(&split_queue_lock, flags); + spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); unfreeze_page(anon_vma, head); ret = -EBUSY; } @@ -3436,52 +3434,56 @@ out: void free_transhuge_page(struct page *page) { + struct pglist_data *pgdata = NODE_DATA(page_to_nid(page)); unsigned long flags; - spin_lock_irqsave(&split_queue_lock, flags); + spin_lock_irqsave(&pgdata->split_queue_lock, flags); if (!list_empty(page_deferred_list(page))) { - split_queue_len--; + pgdata->split_queue_len--; list_del(page_deferred_list(page)); } - spin_unlock_irqrestore(&split_queue_lock, flags); + spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); free_compound_page(page); } void deferred_split_huge_page(struct page *page) { + struct pglist_data *pgdata = NODE_DATA(page_to_nid(page)); unsigned long flags; VM_BUG_ON_PAGE(!PageTransHuge(page), page); - spin_lock_irqsave(&split_queue_lock, flags); + spin_lock_irqsave(&pgdata->split_queue_lock, flags); if (list_empty(page_deferred_list(page))) { - list_add_tail(page_deferred_list(page), &split_queue); - split_queue_len++; + list_add_tail(page_deferred_list(page), &pgdata->split_queue); + pgdata->split_queue_len++; } - spin_unlock_irqrestore(&split_queue_lock, flags); + spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); } static unsigned long deferred_split_count(struct shrinker *shrink, struct shrink_control *sc) { + struct pglist_data *pgdata = NODE_DATA(sc->nid); /* * Split a page from split_queue will free up at least one page, * at most HPAGE_PMD_NR - 1. We don't track exact number. * Let's use HPAGE_PMD_NR / 2 as ballpark. */ - return ACCESS_ONCE(split_queue_len) * HPAGE_PMD_NR / 2; + return ACCESS_ONCE(pgdata->split_queue_len) * HPAGE_PMD_NR / 2; } static unsigned long deferred_split_scan(struct shrinker *shrink, struct shrink_control *sc) { + struct pglist_data *pgdata = NODE_DATA(sc->nid); unsigned long flags; LIST_HEAD(list), *pos, *next; struct page *page; int split = 0; - spin_lock_irqsave(&split_queue_lock, flags); - list_splice_init(&split_queue, &list); + spin_lock_irqsave(&pgdata->split_queue_lock, flags); + list_splice_init(&pgdata->split_queue, &list); /* Take pin on all head pages to avoid freeing them under us */ list_for_each_safe(pos, next, &list) { @@ -3490,10 +3492,10 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, /* race with put_compound_page() */ if (!get_page_unless_zero(page)) { list_del_init(page_deferred_list(page)); - split_queue_len--; + pgdata->split_queue_len--; } } - spin_unlock_irqrestore(&split_queue_lock, flags); + spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); list_for_each_safe(pos, next, &list) { page = list_entry((void *)pos, struct page, mapping); @@ -3505,9 +3507,9 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, put_page(page); } - spin_lock_irqsave(&split_queue_lock, flags); - list_splice_tail(&list, &split_queue); - spin_unlock_irqrestore(&split_queue_lock, flags); + spin_lock_irqsave(&pgdata->split_queue_lock, flags); + list_splice_tail(&list, &pgdata->split_queue); + spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); return split * HPAGE_PMD_NR / 2; } @@ -3516,6 +3518,7 @@ static struct shrinker deferred_split_shrinker = { .count_objects = deferred_split_count, .scan_objects = deferred_split_scan, .seeks = DEFAULT_SEEKS, + .flags = SHRINKER_NUMA_AWARE, }; #ifdef CONFIG_DEBUG_FS diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 63358d9f9aa9..ea2c4d3e0c03 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5209,6 +5209,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) spin_lock_init(&pgdat->numabalancing_migrate_lock); pgdat->numabalancing_migrate_nr_pages = 0; pgdat->numabalancing_migrate_next_window = jiffies; +#endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + spin_lock_init(&pgdat->split_queue_lock); + INIT_LIST_HEAD(&pgdat->split_queue); + pgdat->split_queue_len = 0; #endif init_waitqueue_head(&pgdat->kswapd_wait); init_waitqueue_head(&pgdat->pfmemalloc_wait); -- cgit v1.2.3 From 65376df582174ffcec9e6471bf5b0dd79ba05e4a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 2 Feb 2016 16:57:29 -0800 Subject: proc: revert /proc//maps [stack:TID] annotation Commit b76437579d13 ("procfs: mark thread stack correctly in proc//maps") added [stack:TID] annotation to /proc//maps. Finding the task of a stack VMA requires walking the entire thread list, turning this into quadratic behavior: a thousand threads means a thousand stacks, so the rendering of /proc//maps needs to look at a million combinations. The cost is not in proportion to the usefulness as described in the patch. Drop the [stack:TID] annotation to make /proc//maps (and /proc//numa_maps) usable again for higher thread counts. The [stack] annotation inside /proc//task//maps is retained, as identifying the stack VMA there is an O(1) operation. Siddesh said: "The end users needed a way to identify thread stacks programmatically and there wasn't a way to do that. I'm afraid I no longer remember (or have access to the resources that would aid my memory since I changed employers) the details of their requirement. However, I did do this on my own time because I thought it was an interesting project for me and nobody really gave any feedback then as to its utility, so as far as I am concerned you could roll back the main thread maps information since the information is available in the thread-specific files" Signed-off-by: Johannes Weiner Cc: "Kirill A. Shutemov" Cc: Siddhesh Poyarekar Cc: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 9 ++---- fs/proc/task_mmu.c | 66 +++++++++++++------------------------- fs/proc/task_nommu.c | 49 ++++++++++++---------------- include/linux/mm.h | 3 +- mm/util.c | 27 +--------------- 5 files changed, 48 insertions(+), 106 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index fde9fd06fa98..eaebf27539f5 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -356,7 +356,7 @@ address perms offset dev inode pathname a7cb1000-a7cb2000 ---p 00000000 00:00 0 a7cb2000-a7eb2000 rw-p 00000000 00:00 0 a7eb2000-a7eb3000 ---p 00000000 00:00 0 -a7eb3000-a7ed5000 rw-p 00000000 00:00 0 [stack:1001] +a7eb3000-a7ed5000 rw-p 00000000 00:00 0 a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6 a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6 @@ -388,7 +388,6 @@ is not associated with a file: [heap] = the heap of the program [stack] = the stack of the main process - [stack:1001] = the stack of the thread with tid 1001 [vdso] = the "virtual dynamic shared object", the kernel system call handler @@ -396,10 +395,8 @@ is not associated with a file: The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint of the individual tasks of a process. In this file you will see a mapping marked -as [stack] if that task sees it as a stack. This is a key difference from the -content of /proc/PID/maps, where you will see all mappings that are being used -as stack by all of those tasks. Hence, for the example above, the task-level -map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this: +as [stack] if that task sees it as a stack. Hence, for the example above, the +task-level map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this: 08048000-08049000 r-xp 00000000 03:00 8312 /opt/test 08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4a0c31f904a6..fa95ab2d3674 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -259,23 +259,29 @@ static int do_maps_open(struct inode *inode, struct file *file, sizeof(struct proc_maps_private)); } -static pid_t pid_of_stack(struct proc_maps_private *priv, - struct vm_area_struct *vma, bool is_pid) +/* + * Indicate if the VMA is a stack for the given task; for + * /proc/PID/maps that is the stack of the main task. + */ +static int is_stack(struct proc_maps_private *priv, + struct vm_area_struct *vma, int is_pid) { - struct inode *inode = priv->inode; - struct task_struct *task; - pid_t ret = 0; + int stack = 0; + + if (is_pid) { + stack = vma->vm_start <= vma->vm_mm->start_stack && + vma->vm_end >= vma->vm_mm->start_stack; + } else { + struct inode *inode = priv->inode; + struct task_struct *task; - rcu_read_lock(); - task = pid_task(proc_pid(inode), PIDTYPE_PID); - if (task) { - task = task_of_stack(task, vma, is_pid); + rcu_read_lock(); + task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) - ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); + stack = vma_is_stack_for_task(vma, task); + rcu_read_unlock(); } - rcu_read_unlock(); - - return ret; + return stack; } static void @@ -335,8 +341,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) name = arch_vma_name(vma); if (!name) { - pid_t tid; - if (!mm) { name = "[vdso]"; goto done; @@ -348,21 +352,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) goto done; } - tid = pid_of_stack(priv, vma, is_pid); - if (tid != 0) { - /* - * Thread stack in /proc/PID/task/TID/maps or - * the main process stack. - */ - if (!is_pid || (vma->vm_start <= mm->start_stack && - vma->vm_end >= mm->start_stack)) { - name = "[stack]"; - } else { - /* Thread stack in /proc/PID/maps */ - seq_pad(m, ' '); - seq_printf(m, "[stack:%d]", tid); - } - } + if (is_stack(priv, vma, is_pid)) + name = "[stack]"; } done: @@ -1618,19 +1609,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) seq_file_path(m, file, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_puts(m, " heap"); - } else { - pid_t tid = pid_of_stack(proc_priv, vma, is_pid); - if (tid != 0) { - /* - * Thread stack in /proc/PID/task/TID/maps or - * the main process stack. - */ - if (!is_pid || (vma->vm_start <= mm->start_stack && - vma->vm_end >= mm->start_stack)) - seq_puts(m, " stack"); - else - seq_printf(m, " stack:%d", tid); - } + } else if (is_stack(proc_priv, vma, is_pid)) { + seq_puts(m, " stack"); } if (is_vm_hugetlb_page(vma)) diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index e0d64c92e4f6..faacb0c0d857 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -123,23 +123,26 @@ unsigned long task_statm(struct mm_struct *mm, return size; } -static pid_t pid_of_stack(struct proc_maps_private *priv, - struct vm_area_struct *vma, bool is_pid) +static int is_stack(struct proc_maps_private *priv, + struct vm_area_struct *vma, int is_pid) { - struct inode *inode = priv->inode; - struct task_struct *task; - pid_t ret = 0; - - rcu_read_lock(); - task = pid_task(proc_pid(inode), PIDTYPE_PID); - if (task) { - task = task_of_stack(task, vma, is_pid); + struct mm_struct *mm = vma->vm_mm; + int stack = 0; + + if (is_pid) { + stack = vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack; + } else { + struct inode *inode = priv->inode; + struct task_struct *task; + + rcu_read_lock(); + task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) - ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); + stack = vma_is_stack_for_task(vma, task); + rcu_read_unlock(); } - rcu_read_unlock(); - - return ret; + return stack; } /* @@ -181,21 +184,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, if (file) { seq_pad(m, ' '); seq_file_path(m, file, ""); - } else if (mm) { - pid_t tid = pid_of_stack(priv, vma, is_pid); - - if (tid != 0) { - seq_pad(m, ' '); - /* - * Thread stack in /proc/PID/task/TID/maps or - * the main process stack. - */ - if (!is_pid || (vma->vm_start <= mm->start_stack && - vma->vm_end >= mm->start_stack)) - seq_printf(m, "[stack]"); - else - seq_printf(m, "[stack:%d]", tid); - } + } else if (mm && is_stack(priv, vma, is_pid)) { + seq_pad(m, ' '); + seq_printf(m, "[stack]"); } seq_putc(m, '\n'); diff --git a/include/linux/mm.h b/include/linux/mm.h index f1cd22f2df1a..0b50d7848e3a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1341,8 +1341,7 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma, !vma_growsup(vma->vm_next, addr); } -extern struct task_struct *task_of_stack(struct task_struct *task, - struct vm_area_struct *vma, bool in_group); +int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t); extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, diff --git a/mm/util.c b/mm/util.c index c108a6542d05..4fb14ca5a419 100644 --- a/mm/util.c +++ b/mm/util.c @@ -230,36 +230,11 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, } /* Check if the vma is being used as a stack by this task */ -static int vm_is_stack_for_task(struct task_struct *t, - struct vm_area_struct *vma) +int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t) { return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t)); } -/* - * Check if the vma is being used as a stack. - * If is_group is non-zero, check in the entire thread group or else - * just check in the current task. Returns the task_struct of the task - * that the vma is stack for. Must be called under rcu_read_lock(). - */ -struct task_struct *task_of_stack(struct task_struct *task, - struct vm_area_struct *vma, bool in_group) -{ - if (vm_is_stack_for_task(task, vma)) - return task; - - if (in_group) { - struct task_struct *t; - - for_each_thread(task, t) { - if (vm_is_stack_for_task(t, vma)) - return t; - } - } - - return NULL; -} - #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) void arch_pick_mmap_layout(struct mm_struct *mm) { -- cgit v1.2.3 From c792e8240338e41eda4d06a3a71a7bb7af4e6156 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 2 Feb 2016 16:57:38 -0800 Subject: mm: memcontrol: drop superfluous entry in the per-memcg stats array MEM_CGROUP_STAT_NSTATS is just a delimiter for cgroup1 statistics, not an actual array entry. Reuse it for the first cgroup2 stat entry, like in the event array. Fixes: b2807f07f4f8 ("mm: memcontrol: add "sock" to cgroup2 memory.stat") Signed-off-by: Johannes Weiner Cc: Vladimir Davydov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9ae48d4aeb5e..792c8981e633 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -51,7 +51,7 @@ enum mem_cgroup_stat_index { MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ MEM_CGROUP_STAT_NSTATS, /* default hierarchy stats */ - MEMCG_SOCK, + MEMCG_SOCK = MEM_CGROUP_STAT_NSTATS, MEMCG_NR_STAT, }; -- cgit v1.2.3 From 30bdbb78009e67767983085e302bec6d97afc679 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 2 Feb 2016 16:57:46 -0800 Subject: mm: polish virtual memory accounting * add VM_STACK as alias for VM_GROWSUP/DOWN depending on architecture * always account VMAs with flag VM_STACK as stack (as it was before) * cleanup classifying helpers * update comments and documentation Signed-off-by: Konstantin Khlebnikov Tested-by: Sudip Mukherjee Cc: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 4 ++-- include/linux/mm.h | 6 ++++-- include/linux/mm_types.h | 6 +++--- mm/internal.h | 23 +++++++++++++++++++---- 4 files changed, 28 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index eaebf27539f5..843b045b4069 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -240,8 +240,8 @@ Table 1-2: Contents of the status files (as of 4.1) RssFile size of resident file mappings RssShmem size of resident shmem memory (includes SysV shm, mapping of tmpfs and shared anonymous mappings) - VmData size of data, stack, and text segments - VmStk size of data, stack, and text segments + VmData size of private data segments + VmStk size of stack segments VmExe size of text segment VmLib size of shared library code VmPTE size of page table entries diff --git a/include/linux/mm.h b/include/linux/mm.h index 0b50d7848e3a..516e14944339 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -201,11 +201,13 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_STACK_GROWSUP -#define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) +#define VM_STACK VM_GROWSUP #else -#define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) +#define VM_STACK VM_GROWSDOWN #endif +#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) + /* * Special vmas that are non-mergable, non-mlock()able. * Note: mm/huge_memory.c VM_NO_THP depends on this definition. diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d3ebb9d21a53..624b78b848b8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -424,9 +424,9 @@ struct mm_struct { unsigned long total_vm; /* Total pages mapped */ unsigned long locked_vm; /* Pages that have PG_mlocked set */ unsigned long pinned_vm; /* Refcount permanently increased */ - unsigned long data_vm; /* VM_WRITE & ~VM_SHARED/GROWSDOWN */ - unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ - unsigned long stack_vm; /* VM_GROWSUP/DOWN */ + unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ + unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ + unsigned long stack_vm; /* VM_STACK */ unsigned long def_flags; unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; diff --git a/mm/internal.h b/mm/internal.h index 6e976302ddd8..a38a21ebddb4 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -216,20 +216,35 @@ static inline bool is_cow_mapping(vm_flags_t flags) return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; } +/* + * These three helpers classifies VMAs for virtual memory accounting. + */ + +/* + * Executable code area - executable, not writable, not stack + */ static inline bool is_exec_mapping(vm_flags_t flags) { - return (flags & (VM_EXEC | VM_WRITE)) == VM_EXEC; + return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC; } +/* + * Stack area - atomatically grows in one direction + * + * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous: + * do_mmap() forbids all other combinations. + */ static inline bool is_stack_mapping(vm_flags_t flags) { - return (flags & (VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN))) != 0; + return (flags & VM_STACK) == VM_STACK; } +/* + * Data area - private, writable, not stack + */ static inline bool is_data_mapping(vm_flags_t flags) { - return (flags & ((VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN)) | - VM_WRITE | VM_SHARED)) == VM_WRITE; + return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE; } /* mm/util.c */ -- cgit v1.2.3 From 46437f9a554fbe3e110580ca08ab703b59f2f95a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 2 Feb 2016 16:57:52 -0800 Subject: radix-tree: fix race in gang lookup If the indirect_ptr bit is set on a slot, that indicates we need to redo the lookup. Introduce a new function radix_tree_iter_retry() which forces the loop to retry the lookup by setting 'slot' to NULL and turning the iterator back to point at the problematic entry. This is a pretty rare problem to hit at the moment; the lookup has to race with a grow of the radix tree from a height of 0. The consequences of hitting this race are that gang lookup could return a pointer to a radix_tree_node instead of a pointer to whatever the user had inserted in the tree. Fixes: cebbd29e1c2f ("radix-tree: rewrite gang lookup using iterator") Signed-off-by: Matthew Wilcox Cc: Hugh Dickins Cc: Ohad Ben-Cohen Cc: Konstantin Khlebnikov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 16 ++++++++++++++++ lib/radix-tree.c | 12 ++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 7c88ad156a29..00b17c526c1f 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -378,6 +378,22 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start) void **radix_tree_next_chunk(struct radix_tree_root *root, struct radix_tree_iter *iter, unsigned flags); +/** + * radix_tree_iter_retry - retry this chunk of the iteration + * @iter: iterator state + * + * If we iterate over a tree protected only by the RCU lock, a race + * against deletion or creation may result in seeing a slot for which + * radix_tree_deref_retry() returns true. If so, call this function + * and continue the iteration. + */ +static inline __must_check +void **radix_tree_iter_retry(struct radix_tree_iter *iter) +{ + iter->next_index = iter->index; + return NULL; +} + /** * radix_tree_chunk_size - get current chunk size * diff --git a/lib/radix-tree.c b/lib/radix-tree.c index fcf5d98574ce..6b79e9026e24 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1019,9 +1019,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, return 0; radix_tree_for_each_slot(slot, root, &iter, first_index) { - results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot)); + results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; + if (radix_tree_is_indirect_ptr(results[ret])) { + slot = radix_tree_iter_retry(&iter); + continue; + } if (++ret == max_items) break; } @@ -1098,9 +1102,13 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, return 0; radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { - results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot)); + results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; + if (radix_tree_is_indirect_ptr(results[ret])) { + slot = radix_tree_iter_retry(&iter); + continue; + } if (++ret == max_items) break; } -- cgit v1.2.3 From febe562c20dfa8f33bee7d419c6b517986a5aa33 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 11 Jan 2016 21:31:09 -0800 Subject: target: Fix LUN_RESET active I/O handling for ACK_KREF This patch fixes a NULL pointer se_cmd->cmd_kref < 0 refcount bug during TMR LUN_RESET with active se_cmd I/O, that can be triggered during se_cmd descriptor shutdown + release via core_tmr_drain_state_list() code. To address this bug, add common __target_check_io_state() helper for ABORT_TASK + LUN_RESET w/ CMD_T_COMPLETE checking, and set CMD_T_ABORTED + obtain ->cmd_kref for both cases ahead of last target_put_sess_cmd() after TFO->aborted_task() -> transport_cmd_finish_abort() callback has completed. It also introduces SCF_ACK_KREF to determine when transport_cmd_finish_abort() needs to drop the second extra reference, ahead of calling target_put_sess_cmd() for the final kref_put(&se_cmd->cmd_kref). It also updates transport_cmd_check_stop() to avoid holding se_cmd->t_state_lock while dropping se_cmd device state via target_remove_from_state_list(), now that core_tmr_drain_state_list() is holding the se_device lock while checking se_cmd state from within TMR logic. Finally, move transport_put_cmd() release of SGL + TMR + extended CDB memory into target_free_cmd_mem() in order to avoid potential resource leaks in TMR ABORT_TASK + LUN_RESET code-paths. Also update target_release_cmd_kref() accordingly. Reviewed-by: Quinn Tran Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_tmr.c | 69 ++++++++++++++++++++++------------ drivers/target/target_core_transport.c | 67 ++++++++++++++------------------- include/target/target_core_base.h | 1 + 3 files changed, 76 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index fcdcb117c60d..fb3decc28589 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -107,6 +107,34 @@ static int target_check_cdb_and_preempt(struct list_head *list, return 1; } +static bool __target_check_io_state(struct se_cmd *se_cmd) +{ + struct se_session *sess = se_cmd->se_sess; + + assert_spin_locked(&sess->sess_cmd_lock); + WARN_ON_ONCE(!irqs_disabled()); + /* + * If command already reached CMD_T_COMPLETE state within + * target_complete_cmd(), this se_cmd has been passed to + * fabric driver and will not be aborted. + * + * Otherwise, obtain a local se_cmd->cmd_kref now for TMR + * ABORT_TASK + LUN_RESET for CMD_T_ABORTED processing as + * long as se_cmd->cmd_kref is still active unless zero. + */ + spin_lock(&se_cmd->t_state_lock); + if (se_cmd->transport_state & CMD_T_COMPLETE) { + pr_debug("Attempted to abort io tag: %llu already complete," + " skipping\n", se_cmd->tag); + spin_unlock(&se_cmd->t_state_lock); + return false; + } + se_cmd->transport_state |= CMD_T_ABORTED; + spin_unlock(&se_cmd->t_state_lock); + + return kref_get_unless_zero(&se_cmd->cmd_kref); +} + void core_tmr_abort_task( struct se_device *dev, struct se_tmr_req *tmr, @@ -130,34 +158,22 @@ void core_tmr_abort_task( if (tmr->ref_task_tag != ref_tag) continue; - if (!kref_get_unless_zero(&se_cmd->cmd_kref)) - continue; - printk("ABORT_TASK: Found referenced %s task_tag: %llu\n", se_cmd->se_tfo->get_fabric_name(), ref_tag); - spin_lock(&se_cmd->t_state_lock); - if (se_cmd->transport_state & CMD_T_COMPLETE) { - printk("ABORT_TASK: ref_tag: %llu already complete," - " skipping\n", ref_tag); - spin_unlock(&se_cmd->t_state_lock); + if (!__target_check_io_state(se_cmd)) { spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); - target_put_sess_cmd(se_cmd); - goto out; } - se_cmd->transport_state |= CMD_T_ABORTED; - spin_unlock(&se_cmd->t_state_lock); - list_del_init(&se_cmd->se_cmd_list); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); cancel_work_sync(&se_cmd->work); transport_wait_for_tasks(se_cmd); - target_put_sess_cmd(se_cmd); transport_cmd_finish_abort(se_cmd, true); + target_put_sess_cmd(se_cmd); printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for" " ref_tag: %llu\n", ref_tag); @@ -242,8 +258,10 @@ static void core_tmr_drain_state_list( struct list_head *preempt_and_abort_list) { LIST_HEAD(drain_task_list); + struct se_session *sess; struct se_cmd *cmd, *next; unsigned long flags; + int rc; /* * Complete outstanding commands with TASK_ABORTED SAM status. @@ -282,6 +300,16 @@ static void core_tmr_drain_state_list( if (prout_cmd == cmd) continue; + sess = cmd->se_sess; + if (WARN_ON_ONCE(!sess)) + continue; + + spin_lock(&sess->sess_cmd_lock); + rc = __target_check_io_state(cmd); + spin_unlock(&sess->sess_cmd_lock); + if (!rc) + continue; + list_move_tail(&cmd->state_list, &drain_task_list); cmd->state_active = false; } @@ -289,7 +317,7 @@ static void core_tmr_drain_state_list( while (!list_empty(&drain_task_list)) { cmd = list_entry(drain_task_list.next, struct se_cmd, state_list); - list_del(&cmd->state_list); + list_del_init(&cmd->state_list); pr_debug("LUN_RESET: %s cmd: %p" " ITT/CmdSN: 0x%08llx/0x%08x, i_state: %d, t_state: %d" @@ -313,16 +341,11 @@ static void core_tmr_drain_state_list( * loop above, but we do it down here given that * cancel_work_sync may block. */ - if (cmd->t_state == TRANSPORT_COMPLETE) - cancel_work_sync(&cmd->work); - - spin_lock_irqsave(&cmd->t_state_lock, flags); - target_stop_cmd(cmd, &flags); - - cmd->transport_state |= CMD_T_ABORTED; - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + cancel_work_sync(&cmd->work); + transport_wait_for_tasks(cmd); core_tmr_handle_tas_abort(tmr_nacl, cmd, tas); + target_put_sess_cmd(cmd); } } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 9f3608e10f25..af52f8bd8954 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -534,9 +534,6 @@ void transport_deregister_session(struct se_session *se_sess) } EXPORT_SYMBOL(transport_deregister_session); -/* - * Called with cmd->t_state_lock held. - */ static void target_remove_from_state_list(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; @@ -561,10 +558,6 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, { unsigned long flags; - spin_lock_irqsave(&cmd->t_state_lock, flags); - if (write_pending) - cmd->t_state = TRANSPORT_WRITE_PENDING; - if (remove_from_lists) { target_remove_from_state_list(cmd); @@ -574,6 +567,10 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, cmd->se_lun = NULL; } + spin_lock_irqsave(&cmd->t_state_lock, flags); + if (write_pending) + cmd->t_state = TRANSPORT_WRITE_PENDING; + /* * Determine if frontend context caller is requesting the stopping of * this command for frontend exceptions. @@ -627,6 +624,8 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd) void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) { + bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF); + if (cmd->se_cmd_flags & SCF_SE_LUN_CMD) transport_lun_remove_cmd(cmd); /* @@ -638,7 +637,7 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) if (transport_cmd_check_stop_to_fabric(cmd)) return; - if (remove) + if (remove && ack_kref) transport_put_cmd(cmd); } @@ -706,7 +705,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) * Check for case where an explicit ABORT_TASK has been received * and transport_wait_for_tasks() will be waiting for completion.. */ - if (cmd->transport_state & CMD_T_ABORTED && + if (cmd->transport_state & CMD_T_ABORTED || cmd->transport_state & CMD_T_STOP) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); complete_all(&cmd->t_transport_stop_comp); @@ -2222,20 +2221,14 @@ static inline void transport_free_pages(struct se_cmd *cmd) } /** - * transport_release_cmd - free a command - * @cmd: command to free + * transport_put_cmd - release a reference to a command + * @cmd: command to release * - * This routine unconditionally frees a command, and reference counting - * or list removal must be done in the caller. + * This routine releases our reference to the command and frees it if possible. */ -static int transport_release_cmd(struct se_cmd *cmd) +static int transport_put_cmd(struct se_cmd *cmd) { BUG_ON(!cmd->se_tfo); - - if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) - core_tmr_release_req(cmd->se_tmr_req); - if (cmd->t_task_cdb != cmd->__t_task_cdb) - kfree(cmd->t_task_cdb); /* * If this cmd has been setup with target_get_sess_cmd(), drop * the kref and call ->release_cmd() in kref callback. @@ -2243,18 +2236,6 @@ static int transport_release_cmd(struct se_cmd *cmd) return target_put_sess_cmd(cmd); } -/** - * transport_put_cmd - release a reference to a command - * @cmd: command to release - * - * This routine releases our reference to the command and frees it if possible. - */ -static int transport_put_cmd(struct se_cmd *cmd) -{ - transport_free_pages(cmd); - return transport_release_cmd(cmd); -} - void *transport_kmap_data_sg(struct se_cmd *cmd) { struct scatterlist *sg = cmd->t_data_sg; @@ -2452,14 +2433,13 @@ static void transport_write_pending_qf(struct se_cmd *cmd) int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) { - unsigned long flags; int ret = 0; if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD)) { if (wait_for_tasks && (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) - transport_wait_for_tasks(cmd); + transport_wait_for_tasks(cmd); - ret = transport_release_cmd(cmd); + ret = transport_put_cmd(cmd); } else { if (wait_for_tasks) transport_wait_for_tasks(cmd); @@ -2468,11 +2448,8 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) * has already added se_cmd to state_list, but fabric has * failed command before I/O submission. */ - if (cmd->state_active) { - spin_lock_irqsave(&cmd->t_state_lock, flags); + if (cmd->state_active) target_remove_from_state_list(cmd); - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - } if (cmd->se_lun) transport_lun_remove_cmd(cmd); @@ -2517,6 +2494,16 @@ out: } EXPORT_SYMBOL(target_get_sess_cmd); +static void target_free_cmd_mem(struct se_cmd *cmd) +{ + transport_free_pages(cmd); + + if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) + core_tmr_release_req(cmd->se_tmr_req); + if (cmd->t_task_cdb != cmd->__t_task_cdb) + kfree(cmd->t_task_cdb); +} + static void target_release_cmd_kref(struct kref *kref) { struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref); @@ -2526,17 +2513,20 @@ static void target_release_cmd_kref(struct kref *kref) spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); if (list_empty(&se_cmd->se_cmd_list)) { spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + target_free_cmd_mem(se_cmd); se_cmd->se_tfo->release_cmd(se_cmd); return; } if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) { spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + target_free_cmd_mem(se_cmd); complete(&se_cmd->cmd_wait_comp); return; } list_del(&se_cmd->se_cmd_list); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + target_free_cmd_mem(se_cmd); se_cmd->se_tfo->release_cmd(se_cmd); } @@ -2548,6 +2538,7 @@ int target_put_sess_cmd(struct se_cmd *se_cmd) struct se_session *se_sess = se_cmd->se_sess; if (!se_sess) { + target_free_cmd_mem(se_cmd); se_cmd->se_tfo->release_cmd(se_cmd); return 1; } diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 5d82816cc4e3..1a76726c45a2 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -140,6 +140,7 @@ enum se_cmd_flags_table { SCF_COMPARE_AND_WRITE = 0x00080000, SCF_COMPARE_AND_WRITE_POST = 0x00100000, SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC = 0x00200000, + SCF_ACK_KREF = 0x00400000, }; /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */ -- cgit v1.2.3 From fac710e45d0b12443acd4d905c9acec27ab4ca14 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 27 Jan 2016 10:08:42 -0200 Subject: [media] vb2: fix nasty vb2_thread regression The vb2_thread implementation was made generic and was moved from videobuf2-v4l2.c to videobuf2-core.c in commit af3bac1a. Unfortunately that clearly was never tested since it broke read() causing NULL address references. The root cause was confused handling of vb2_buffer vs v4l2_buffer (the pb pointer in various core functions). The v4l2_buffer no longer exists after moving the code into the core and it is no longer needed. However, the vb2_thread code passed a pointer to a vb2_buffer to the core functions were a v4l2_buffer pointer was expected and vb2_thread expected that the vb2_buffer fields would be filled in correctly. This is obviously wrong since v4l2_buffer != vb2_buffer. Note that the pb pointer is a void pointer, so no type-checking took place. This patch fixes this problem: 1) allow pb to be NULL for vb2_core_(d)qbuf. The vb2_thread code will use a NULL pointer here since they don't care about v4l2_buffer anyway. 2) let vb2_core_dqbuf pass back the index of the received buffer. This is all vb2_thread needs: this index is the index into the q->bufs array and vb2_thread just gets the vb2_buffer from there. 3) the fileio->b pointer (that originally contained a v4l2_buffer) is removed altogether since it is no longer needed. Tested with vivid and the cobalt driver. Cc: stable@vger.kernel.org # Kernel >= 4.3 Signed-off-by: Hans Verkuil Reported-by: Matthias Schwarzott Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/videobuf2-core.c | 95 +++++++++++++++----------------- drivers/media/v4l2-core/videobuf2-v4l2.c | 2 +- include/media/videobuf2-core.h | 3 +- 3 files changed, 46 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index c5d49d7a0d76..b53e42c329cb 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1063,8 +1063,11 @@ EXPORT_SYMBOL_GPL(vb2_discard_done); */ static int __qbuf_mmap(struct vb2_buffer *vb, const void *pb) { - int ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, - vb, pb, vb->planes); + int ret = 0; + + if (pb) + ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, + vb, pb, vb->planes); return ret ? ret : call_vb_qop(vb, buf_prepare, vb); } @@ -1077,14 +1080,16 @@ static int __qbuf_userptr(struct vb2_buffer *vb, const void *pb) struct vb2_queue *q = vb->vb2_queue; void *mem_priv; unsigned int plane; - int ret; + int ret = 0; enum dma_data_direction dma_dir = q->is_output ? DMA_TO_DEVICE : DMA_FROM_DEVICE; bool reacquired = vb->planes[0].mem_priv == NULL; memset(planes, 0, sizeof(planes[0]) * vb->num_planes); /* Copy relevant information provided by the userspace */ - ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, vb, pb, planes); + if (pb) + ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, + vb, pb, planes); if (ret) return ret; @@ -1192,14 +1197,16 @@ static int __qbuf_dmabuf(struct vb2_buffer *vb, const void *pb) struct vb2_queue *q = vb->vb2_queue; void *mem_priv; unsigned int plane; - int ret; + int ret = 0; enum dma_data_direction dma_dir = q->is_output ? DMA_TO_DEVICE : DMA_FROM_DEVICE; bool reacquired = vb->planes[0].mem_priv == NULL; memset(planes, 0, sizeof(planes[0]) * vb->num_planes); /* Copy relevant information provided by the userspace */ - ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, vb, pb, planes); + if (pb) + ret = call_bufop(vb->vb2_queue, fill_vb2_buffer, + vb, pb, planes); if (ret) return ret; @@ -1520,7 +1527,8 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb) q->waiting_for_buffers = false; vb->state = VB2_BUF_STATE_QUEUED; - call_void_bufop(q, copy_timestamp, vb, pb); + if (pb) + call_void_bufop(q, copy_timestamp, vb, pb); trace_vb2_qbuf(q, vb); @@ -1532,7 +1540,8 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb) __enqueue_in_driver(vb); /* Fill buffer information for the userspace */ - call_void_bufop(q, fill_user_buffer, vb, pb); + if (pb) + call_void_bufop(q, fill_user_buffer, vb, pb); /* * If streamon has been called, and we haven't yet called @@ -1731,7 +1740,8 @@ static void __vb2_dqbuf(struct vb2_buffer *vb) * The return values from this function are intended to be directly returned * from vidioc_dqbuf handler in driver. */ -int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking) +int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb, + bool nonblocking) { struct vb2_buffer *vb = NULL; int ret; @@ -1754,8 +1764,12 @@ int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking) call_void_vb_qop(vb, buf_finish, vb); + if (pindex) + *pindex = vb->index; + /* Fill buffer information for the userspace */ - call_void_bufop(q, fill_user_buffer, vb, pb); + if (pb) + call_void_bufop(q, fill_user_buffer, vb, pb); /* Remove from videobuf queue */ list_del(&vb->queued_entry); @@ -1828,7 +1842,7 @@ static void __vb2_queue_cancel(struct vb2_queue *q) * that's done in dqbuf, but that's not going to happen when we * cancel the whole queue. Note: this code belongs here, not in * __vb2_dqbuf() since in vb2_internal_dqbuf() there is a critical - * call to __fill_v4l2_buffer() after buf_finish(). That order can't + * call to __fill_user_buffer() after buf_finish(). That order can't * be changed, so we can't move the buf_finish() to __vb2_dqbuf(). */ for (i = 0; i < q->num_buffers; ++i) { @@ -2357,7 +2371,6 @@ struct vb2_fileio_data { unsigned int count; unsigned int type; unsigned int memory; - struct vb2_buffer *b; struct vb2_fileio_buf bufs[VB2_MAX_FRAME]; unsigned int cur_index; unsigned int initial_index; @@ -2410,12 +2423,6 @@ static int __vb2_init_fileio(struct vb2_queue *q, int read) if (fileio == NULL) return -ENOMEM; - fileio->b = kzalloc(q->buf_struct_size, GFP_KERNEL); - if (fileio->b == NULL) { - kfree(fileio); - return -ENOMEM; - } - fileio->read_once = q->fileio_read_once; fileio->write_immediately = q->fileio_write_immediately; @@ -2460,13 +2467,7 @@ static int __vb2_init_fileio(struct vb2_queue *q, int read) * Queue all buffers. */ for (i = 0; i < q->num_buffers; i++) { - struct vb2_buffer *b = fileio->b; - - memset(b, 0, q->buf_struct_size); - b->type = q->type; - b->memory = q->memory; - b->index = i; - ret = vb2_core_qbuf(q, i, b); + ret = vb2_core_qbuf(q, i, NULL); if (ret) goto err_reqbufs; fileio->bufs[i].queued = 1; @@ -2511,7 +2512,6 @@ static int __vb2_cleanup_fileio(struct vb2_queue *q) q->fileio = NULL; fileio->count = 0; vb2_core_reqbufs(q, fileio->memory, &fileio->count); - kfree(fileio->b); kfree(fileio); dprintk(3, "file io emulator closed\n"); } @@ -2539,7 +2539,8 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_ * else is able to provide this information with the write() operation. */ bool copy_timestamp = !read && q->copy_timestamp; - int ret, index; + unsigned index; + int ret; dprintk(3, "mode %s, offset %ld, count %zd, %sblocking\n", read ? "read" : "write", (long)*ppos, count, @@ -2564,22 +2565,20 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_ */ index = fileio->cur_index; if (index >= q->num_buffers) { - struct vb2_buffer *b = fileio->b; + struct vb2_buffer *b; /* * Call vb2_dqbuf to get buffer back. */ - memset(b, 0, q->buf_struct_size); - b->type = q->type; - b->memory = q->memory; - ret = vb2_core_dqbuf(q, b, nonblock); + ret = vb2_core_dqbuf(q, &index, NULL, nonblock); dprintk(5, "vb2_dqbuf result: %d\n", ret); if (ret) return ret; fileio->dq_count += 1; - fileio->cur_index = index = b->index; + fileio->cur_index = index; buf = &fileio->bufs[index]; + b = q->bufs[index]; /* * Get number of bytes filled by the driver @@ -2630,7 +2629,7 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_ * Queue next buffer if required. */ if (buf->pos == buf->size || (!read && fileio->write_immediately)) { - struct vb2_buffer *b = fileio->b; + struct vb2_buffer *b = q->bufs[index]; /* * Check if this is the last buffer to read. @@ -2643,15 +2642,11 @@ static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_ /* * Call vb2_qbuf and give buffer to the driver. */ - memset(b, 0, q->buf_struct_size); - b->type = q->type; - b->memory = q->memory; - b->index = index; b->planes[0].bytesused = buf->pos; if (copy_timestamp) b->timestamp = ktime_get_ns(); - ret = vb2_core_qbuf(q, index, b); + ret = vb2_core_qbuf(q, index, NULL); dprintk(5, "vb2_dbuf result: %d\n", ret); if (ret) return ret; @@ -2713,10 +2708,9 @@ static int vb2_thread(void *data) { struct vb2_queue *q = data; struct vb2_threadio_data *threadio = q->threadio; - struct vb2_fileio_data *fileio = q->fileio; bool copy_timestamp = false; - int prequeue = 0; - int index = 0; + unsigned prequeue = 0; + unsigned index = 0; int ret = 0; if (q->is_output) { @@ -2728,37 +2722,34 @@ static int vb2_thread(void *data) for (;;) { struct vb2_buffer *vb; - struct vb2_buffer *b = fileio->b; /* * Call vb2_dqbuf to get buffer back. */ - memset(b, 0, q->buf_struct_size); - b->type = q->type; - b->memory = q->memory; if (prequeue) { - b->index = index++; + vb = q->bufs[index++]; prequeue--; } else { call_void_qop(q, wait_finish, q); if (!threadio->stop) - ret = vb2_core_dqbuf(q, b, 0); + ret = vb2_core_dqbuf(q, &index, NULL, 0); call_void_qop(q, wait_prepare, q); dprintk(5, "file io: vb2_dqbuf result: %d\n", ret); + if (!ret) + vb = q->bufs[index]; } if (ret || threadio->stop) break; try_to_freeze(); - vb = q->bufs[b->index]; - if (b->state == VB2_BUF_STATE_DONE) + if (vb->state == VB2_BUF_STATE_DONE) if (threadio->fnc(vb, threadio->priv)) break; call_void_qop(q, wait_finish, q); if (copy_timestamp) - b->timestamp = ktime_get_ns();; + vb->timestamp = ktime_get_ns();; if (!threadio->stop) - ret = vb2_core_qbuf(q, b->index, b); + ret = vb2_core_qbuf(q, vb->index, NULL); call_void_qop(q, wait_prepare, q); if (ret || threadio->stop) break; diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c index c9a28605511a..91f552124050 100644 --- a/drivers/media/v4l2-core/videobuf2-v4l2.c +++ b/drivers/media/v4l2-core/videobuf2-v4l2.c @@ -625,7 +625,7 @@ static int vb2_internal_dqbuf(struct vb2_queue *q, struct v4l2_buffer *b, return -EINVAL; } - ret = vb2_core_dqbuf(q, b, nonblocking); + ret = vb2_core_dqbuf(q, NULL, b, nonblocking); return ret; } diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index ef03ae56b1c1..8a0f55b6c2ba 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -533,7 +533,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, const unsigned int requested_sizes[]); int vb2_core_prepare_buf(struct vb2_queue *q, unsigned int index, void *pb); int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb); -int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking); +int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb, + bool nonblocking); int vb2_core_streamon(struct vb2_queue *q, unsigned int type); int vb2_core_streamoff(struct vb2_queue *q, unsigned int type); -- cgit v1.2.3 From bf64318564c43385ffc3d3dfedab5287bdf3dfdd Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Thu, 4 Feb 2016 15:13:27 +0100 Subject: lightnvm: allow to force mm initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit System block allows the device to initialize with its configured media manager. The system blocks is written to disk, and read again when media manager is determined. For this to work, the backend must store the data. Device drivers, such as null_blk, does not have any backend storage. This patch allows the media manager to be initialized without a storage backend. It also fix incorrect configuration of capabilities in null_blk, as it does not support get/set bad block interface. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 2 +- drivers/lightnvm/core.c | 25 ++++++++++++++++--------- include/linux/lightnvm.h | 4 ++++ 3 files changed, 21 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 8ba1e97d573c..ae05d31c0559 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -478,7 +478,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) id->ver_id = 0x1; id->vmnt = 0; id->cgrps = 1; - id->cap = 0x3; + id->cap = 0x2; id->dom = 0x1; id->ppaf.blk_offset = 0; diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 33224cb91c5b..9f6acd5d1d2e 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -572,11 +572,13 @@ int nvm_register(struct request_queue *q, char *disk_name, } } - ret = nvm_get_sysblock(dev, &dev->sb); - if (!ret) - pr_err("nvm: device not initialized.\n"); - else if (ret < 0) - pr_err("nvm: err (%d) on device initialization\n", ret); + if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) { + ret = nvm_get_sysblock(dev, &dev->sb); + if (!ret) + pr_err("nvm: device not initialized.\n"); + else if (ret < 0) + pr_err("nvm: err (%d) on device initialization\n", ret); + } /* register device with a supported media manager */ down_write(&nvm_lock); @@ -1055,9 +1057,11 @@ static long __nvm_ioctl_dev_init(struct nvm_ioctl_dev_init *init) strncpy(info.mmtype, init->mmtype, NVM_MMTYPE_LEN); info.fs_ppa.ppa = -1; - ret = nvm_init_sysblock(dev, &info); - if (ret) - return ret; + if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) { + ret = nvm_init_sysblock(dev, &info); + if (ret) + return ret; + } memcpy(&dev->sb, &info, sizeof(struct nvm_sb_info)); @@ -1117,7 +1121,10 @@ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg) dev->mt = NULL; } - return nvm_dev_factory(dev, fact.flags); + if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) + return nvm_dev_factory(dev, fact.flags); + + return 0; } static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg) diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index d6750111e48e..2190419bdf0a 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -135,6 +135,10 @@ enum { /* Memory types */ NVM_ID_FMTYPE_SLC = 0, NVM_ID_FMTYPE_MLC = 1, + + /* Device capabilities */ + NVM_ID_DCAP_BBLKMGMT = 0x1, + NVM_UD_DCAP_ECC = 0x2, }; struct nvm_id_lp_mlc { -- cgit v1.2.3 From dc6ae6d8e7726bad4f1c87244b49cac851746c65 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 31 Jan 2016 14:36:07 +0100 Subject: crush: add chooseleaf_stable tunable Add a tunable to fix the bug that chooseleaf may cause unnecessary pg migrations when some device fails. Reflects ceph.git commit fdb3f664448e80d984470f32f04e2e6f03ab52ec. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/crush/crush.h | 8 +++++++- net/ceph/crush/mapper.c | 18 ++++++++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 48b49305716b..be8f12b8f195 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -59,7 +59,8 @@ enum { CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */ CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10, CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11, - CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12 + CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12, + CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13 }; /* @@ -205,6 +206,11 @@ struct crush_map { * mappings line up a bit better with previous mappings. */ __u8 chooseleaf_vary_r; + /* if true, it makes chooseleaf firstn to return stable results (if + * no local retry) so that data migrations would be optimal when some + * device fails. */ + __u8 chooseleaf_stable; + #ifndef __KERNEL__ /* * version 0 (original) of straw_calc has various flaws. version 1 diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index abb700621e4a..5fcfb98f309e 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -403,6 +403,7 @@ static int is_out(const struct crush_map *map, * @local_retries: localized retries * @local_fallback_retries: localized fallback retries * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose) + * @stable: stable mode starts rep=0 in the recursive call for all replicas * @vary_r: pass r to recursive calls * @out2: second output vector for leaf items (if @recurse_to_leaf) * @parent_r: r value passed from the parent @@ -419,6 +420,7 @@ static int crush_choose_firstn(const struct crush_map *map, unsigned int local_fallback_retries, int recurse_to_leaf, unsigned int vary_r, + unsigned int stable, int *out2, int parent_r) { @@ -433,13 +435,13 @@ static int crush_choose_firstn(const struct crush_map *map, int collide, reject; int count = out_size; - dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n", + dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d stable %d\n", recurse_to_leaf ? "_LEAF" : "", bucket->id, x, outpos, numrep, tries, recurse_tries, local_retries, local_fallback_retries, - parent_r); + parent_r, stable); - for (rep = outpos; rep < numrep && count > 0 ; rep++) { + for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++) { /* keep trying until we get a non-out, non-colliding item */ ftotal = 0; skip_rep = 0; @@ -512,13 +514,14 @@ static int crush_choose_firstn(const struct crush_map *map, if (crush_choose_firstn(map, map->buckets[-1-item], weight, weight_max, - x, outpos+1, 0, + x, stable ? 1 : outpos+1, 0, out2, outpos, count, recurse_tries, 0, local_retries, local_fallback_retries, 0, vary_r, + stable, NULL, sub_r) <= outpos) /* didn't get leaf */ @@ -816,6 +819,7 @@ int crush_do_rule(const struct crush_map *map, int choose_local_fallback_retries = map->choose_local_fallback_tries; int vary_r = map->chooseleaf_vary_r; + int stable = map->chooseleaf_stable; if ((__u32)ruleno >= map->max_rules) { dprintk(" bad ruleno %d\n", ruleno); @@ -870,6 +874,11 @@ int crush_do_rule(const struct crush_map *map, vary_r = curstep->arg1; break; + case CRUSH_RULE_SET_CHOOSELEAF_STABLE: + if (curstep->arg1 >= 0) + stable = curstep->arg1; + break; + case CRUSH_RULE_CHOOSELEAF_FIRSTN: case CRUSH_RULE_CHOOSE_FIRSTN: firstn = 1; @@ -932,6 +941,7 @@ int crush_do_rule(const struct crush_map *map, choose_local_fallback_retries, recurse_to_leaf, vary_r, + stable, c+osize, 0); } else { -- cgit v1.2.3 From 97db9a88186e3a7d3a1942370c836bf221d3ab90 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 1 Feb 2016 17:08:33 +0100 Subject: libceph: advertise support for TUNABLES5 Add TUNABLES5 feature (chooseleaf_stable tunable) to a set of features supported by default. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/ceph_features.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index f89b31d45cc8..c3b211c9fe83 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -63,6 +63,16 @@ #define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49) // duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY #define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49) /* overlap w/ above */ +#define CEPH_FEATURE_MON_METADATA (1ULL<<50) +#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */ +#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52) +#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53) +#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54) +#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55) +#define CEPH_FEATURE_NEW_OSDOP_ENCODING (1ULL<<56) /* New, v7 encoding */ +#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */ +#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */ +#define CEPH_FEATURE_CRUSH_TUNABLES5 (1ULL<<58) /* chooseleaf stable mode */ /* * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature @@ -108,7 +118,8 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_CRUSH_TUNABLES3 | \ CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ CEPH_FEATURE_MSGR_KEEPALIVE2 | \ - CEPH_FEATURE_CRUSH_V4) + CEPH_FEATURE_CRUSH_V4 | \ + CEPH_FEATURE_CRUSH_TUNABLES5) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ -- cgit v1.2.3 From b0b31a8ffe54abf0a455bcaee54dd92f08817164 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 3 Feb 2016 15:25:48 +0100 Subject: libceph: MOSDOpReply v7 encoding Empty request_redirect_t (struct ceph_request_redirect in the kernel client) is now encoded with a bool. NEW_OSDOPREPLY_ENCODING feature bit overlaps with already supported CRUSH_TUNABLES5. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/ceph_features.h | 5 ++++- net/ceph/osd_client.c | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index c3b211c9fe83..c1ef6f14e7be 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -73,6 +73,8 @@ #define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */ #define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */ #define CEPH_FEATURE_CRUSH_TUNABLES5 (1ULL<<58) /* chooseleaf stable mode */ +// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5 +#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING (1ULL<<58) /* New, v7 encoding */ /* * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature @@ -119,7 +121,8 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ CEPH_FEATURE_MSGR_KEEPALIVE2 | \ CEPH_FEATURE_CRUSH_V4 | \ - CEPH_FEATURE_CRUSH_TUNABLES5) + CEPH_FEATURE_CRUSH_TUNABLES5 | \ + CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index f8f235930d88..3534e12683d3 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1770,6 +1770,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg) u32 osdmap_epoch; int already_completed; u32 bytes; + u8 decode_redir; unsigned int i; tid = le64_to_cpu(msg->hdr.tid); @@ -1841,6 +1842,15 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg) p += 8 + 4; /* skip replay_version */ p += 8; /* skip user_version */ + if (le16_to_cpu(msg->hdr.version) >= 7) + ceph_decode_8_safe(&p, end, decode_redir, bad_put); + else + decode_redir = 1; + } else { + decode_redir = 0; + } + + if (decode_redir) { err = ceph_redirect_decode(&p, end, &redir); if (err) goto bad_put; -- cgit v1.2.3 From 0fb5b1fb30fba3671dd5b1489d78e93e08d62e4e Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 4 Feb 2016 00:52:12 -0500 Subject: block/sd: Return -EREMOTEIO when WRITE SAME and DISCARD are disabled When a storage device rejects a WRITE SAME command we will disable write same functionality for the device and return -EREMOTEIO to the block layer. -EREMOTEIO will in turn prevent DM from retrying the I/O and/or failing the path. Yiwen Jiang discovered a small race where WRITE SAME requests issued simultaneously would cause -EIO to be returned. This happened because any requests being prepared after WRITE SAME had been disabled for the device caused us to return BLKPREP_KILL. The latter caused the block layer to return -EIO upon completion. To overcome this we introduce BLKPREP_INVALID which indicates that this is an invalid request for the device. blk_peek_request() is modified to return -EREMOTEIO in that case. Reported-by: Yiwen Jiang Suggested-by: Mike Snitzer Reviewed-by: Hannes Reinicke Reviewed-by: Ewan Milne Reviewed-by: Yiwen Jiang Signed-off-by: Martin K. Petersen --- block/blk-core.c | 6 ++++-- drivers/scsi/sd.c | 4 ++-- include/linux/blkdev.h | 9 ++++++--- 3 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 476244d59309..35607dd0223b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2447,14 +2447,16 @@ struct request *blk_peek_request(struct request_queue *q) rq = NULL; break; - } else if (ret == BLKPREP_KILL) { + } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) { + int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO; + rq->cmd_flags |= REQ_QUIET; /* * Mark this request as started so we don't trigger * any debug logic in the end I/O path. */ blk_start_request(rq); - __blk_end_request_all(rq, -EIO); + __blk_end_request_all(rq, err); } else { printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); break; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index ec163d08f6c3..6e841c6da632 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -761,7 +761,7 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd) break; default: - ret = BLKPREP_KILL; + ret = BLKPREP_INVALID; goto out; } @@ -839,7 +839,7 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) int ret; if (sdkp->device->no_write_same) - return BLKPREP_KILL; + return BLKPREP_INVALID; BUG_ON(bio_offset(bio) || bio_iovec(bio).bv_len != sdp->sector_size); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d372ea87ead5..a9b643a0647f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -681,9 +681,12 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) /* * q->prep_rq_fn return values */ -#define BLKPREP_OK 0 /* serve it */ -#define BLKPREP_KILL 1 /* fatal error, kill */ -#define BLKPREP_DEFER 2 /* leave on queue */ +enum { + BLKPREP_OK, /* serve it */ + BLKPREP_KILL, /* fatal error, kill, return -EIO */ + BLKPREP_DEFER, /* leave on queue */ + BLKPREP_INVALID, /* invalid command, kill, return -EREMOTEIO */ +}; extern unsigned long blk_max_low_pfn, blk_max_pfn; -- cgit v1.2.3 From 64566b5e767f9bc3161055ca1b443a51afb52aad Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 22 Jan 2016 17:07:25 -0500 Subject: drm: Add drm_fixp_from_fraction and drm_fixp2int_ceil drm_fixp_from_fraction allows us to create a fixed point directly from a fraction, rather than creating fixed point values and dividing later. This avoids overflow of our 64 bit value for large numbers. drm_fixp2int_ceil allows us to return the ceiling of our fixed point value. [airlied: squash Jordan's fix] 32-bit-build-fix: Jordan Lazare Signed-off-by: Harry Wentland Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- include/drm/drm_fixed.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index d639049a613d..553210c02ee0 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -73,18 +73,28 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B) #define DRM_FIXED_ONE (1ULL << DRM_FIXED_POINT) #define DRM_FIXED_DECIMAL_MASK (DRM_FIXED_ONE - 1) #define DRM_FIXED_DIGITS_MASK (~DRM_FIXED_DECIMAL_MASK) +#define DRM_FIXED_EPSILON 1LL +#define DRM_FIXED_ALMOST_ONE (DRM_FIXED_ONE - DRM_FIXED_EPSILON) static inline s64 drm_int2fixp(int a) { return ((s64)a) << DRM_FIXED_POINT; } -static inline int drm_fixp2int(int64_t a) +static inline int drm_fixp2int(s64 a) { return ((s64)a) >> DRM_FIXED_POINT; } -static inline unsigned drm_fixp_msbset(int64_t a) +static inline int drm_fixp2int_ceil(s64 a) +{ + if (a > 0) + return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE); + else + return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE); +} + +static inline unsigned drm_fixp_msbset(s64 a) { unsigned shift, sign = (a >> 63) & 1; @@ -136,6 +146,45 @@ static inline s64 drm_fixp_div(s64 a, s64 b) return result; } +static inline s64 drm_fixp_from_fraction(s64 a, s64 b) +{ + s64 res; + bool a_neg = a < 0; + bool b_neg = b < 0; + u64 a_abs = a_neg ? -a : a; + u64 b_abs = b_neg ? -b : b; + u64 rem; + + /* determine integer part */ + u64 res_abs = div64_u64_rem(a_abs, b_abs, &rem); + + /* determine fractional part */ + { + u32 i = DRM_FIXED_POINT; + + do { + rem <<= 1; + res_abs <<= 1; + if (rem >= b_abs) { + res_abs |= 1; + rem -= b_abs; + } + } while (--i != 0); + } + + /* round up LSB */ + { + u64 summand = (rem << 1) >= b_abs; + + res_abs += summand; + } + + res = (s64) res_abs; + if (a_neg ^ b_neg) + res = -res; + return res; +} + static inline s64 drm_fixp_exp(s64 x) { s64 tolerance = div64_s64(DRM_FIXED_ONE, 1000000); -- cgit v1.2.3 From 5e93b8208d3c419b515fb75e2601931c027e12ab Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Fri, 22 Jan 2016 17:07:28 -0500 Subject: drm/dp/mst: move GUID storage from mgr, port to only mst branch Previous implementation does not handle case below: boot up one MST branch to DP connector of ASIC. After boot up, hot plug 2nd MST branch to DP output of 1st MST, GUID is not created for 2nd MST branch. When downstream port of 2nd MST branch send upstream request, it fails because 2nd MST branch GUID is not available. New Implementation: only create GUID for MST branch and save it within Branch. Signed-off-by: Hersen Wu Reviewed-by: Harry Wentland Cc: stable@vger.kernel.org Acked-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_dp_mst_topology.c | 64 +++++++++++++++-------------------- include/drm/drm_dp_mst_helper.h | 25 ++++++-------- 2 files changed, 38 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 630e6eb55fa3..2c12a20ff08e 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1018,18 +1018,27 @@ static bool drm_dp_port_setup_pdt(struct drm_dp_mst_port *port) return send_link; } -static void drm_dp_check_port_guid(struct drm_dp_mst_branch *mstb, - struct drm_dp_mst_port *port) +static void drm_dp_check_mstb_guid(struct drm_dp_mst_branch *mstb, u8 *guid) { int ret; - if (port->dpcd_rev >= 0x12) { - port->guid_valid = drm_dp_validate_guid(mstb->mgr, port->guid); - if (!port->guid_valid) { - ret = drm_dp_send_dpcd_write(mstb->mgr, - port, - DP_GUID, - 16, port->guid); - port->guid_valid = true; + + memcpy(mstb->guid, guid, 16); + + if (!drm_dp_validate_guid(mstb->mgr, mstb->guid)) { + if (mstb->port_parent) { + ret = drm_dp_send_dpcd_write( + mstb->mgr, + mstb->port_parent, + DP_GUID, + 16, + mstb->guid); + } else { + + ret = drm_dp_dpcd_write( + mstb->mgr->aux, + DP_GUID, + mstb->guid, + 16); } } } @@ -1086,7 +1095,6 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb, port->dpcd_rev = port_msg->dpcd_revision; port->num_sdp_streams = port_msg->num_sdp_streams; port->num_sdp_stream_sinks = port_msg->num_sdp_stream_sinks; - memcpy(port->guid, port_msg->peer_guid, 16); /* manage mstb port lists with mgr lock - take a reference for this list */ @@ -1099,11 +1107,9 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb, if (old_ddps != port->ddps) { if (port->ddps) { - drm_dp_check_port_guid(mstb, port); if (!port->input) drm_dp_send_enum_path_resources(mstb->mgr, mstb, port); } else { - port->guid_valid = false; port->available_pbn = 0; } } @@ -1161,9 +1167,7 @@ static void drm_dp_update_port(struct drm_dp_mst_branch *mstb, if (old_ddps != port->ddps) { dowork = true; if (port->ddps) { - drm_dp_check_port_guid(mstb, port); } else { - port->guid_valid = false; port->available_pbn = 0; } } @@ -1220,13 +1224,14 @@ static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper( struct drm_dp_mst_branch *found_mstb; struct drm_dp_mst_port *port; + if (memcmp(mstb->guid, guid, 16) == 0) + return mstb; + + list_for_each_entry(port, &mstb->ports, next) { if (!port->mstb) continue; - if (port->guid_valid && memcmp(port->guid, guid, 16) == 0) - return port->mstb; - found_mstb = get_mst_branch_device_by_guid_helper(port->mstb, guid); if (found_mstb) @@ -1245,10 +1250,7 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device_by_guid( /* find the port by iterating down */ mutex_lock(&mgr->lock); - if (mgr->guid_valid && memcmp(mgr->guid, guid, 16) == 0) - mstb = mgr->mst_primary; - else - mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid); + mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid); if (mstb) kref_get(&mstb->kref); @@ -1566,6 +1568,9 @@ static void drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, txmsg->reply.u.link_addr.ports[i].num_sdp_streams, txmsg->reply.u.link_addr.ports[i].num_sdp_stream_sinks); } + + drm_dp_check_mstb_guid(mstb, txmsg->reply.u.link_addr.guid); + for (i = 0; i < txmsg->reply.u.link_addr.nports; i++) { drm_dp_add_port(mstb, mgr->dev, &txmsg->reply.u.link_addr.ports[i]); } @@ -2006,20 +2011,6 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms goto out_unlock; } - - /* sort out guid */ - ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, mgr->guid, 16); - if (ret != 16) { - DRM_DEBUG_KMS("failed to read DP GUID %d\n", ret); - goto out_unlock; - } - - mgr->guid_valid = drm_dp_validate_guid(mgr, mgr->guid); - if (!mgr->guid_valid) { - ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, mgr->guid, 16); - mgr->guid_valid = true; - } - queue_work(system_long_wq, &mgr->work); ret = 0; @@ -2241,6 +2232,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) } drm_dp_update_port(mstb, &msg.u.conn_stat); + DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", msg.u.conn_stat.port_number, msg.u.conn_stat.legacy_device_plug_status, msg.u.conn_stat.displayport_device_plug_status, msg.u.conn_stat.message_capability_status, msg.u.conn_stat.input_port, msg.u.conn_stat.peer_device_type); } else if (msg.req_type == DP_RESOURCE_STATUS_NOTIFY) { drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, msg.req_type, seqno, false); diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index 24ab1787b771..fdb47051d549 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -44,8 +44,6 @@ struct drm_dp_vcpi { /** * struct drm_dp_mst_port - MST port * @kref: reference count for this port. - * @guid_valid: for DP 1.2 devices if we have validated the GUID. - * @guid: guid for DP 1.2 device on this port. * @port_num: port number * @input: if this port is an input port. * @mcs: message capability status - DP 1.2 spec. @@ -70,10 +68,6 @@ struct drm_dp_vcpi { struct drm_dp_mst_port { struct kref kref; - /* if dpcd 1.2 device is on this port - its GUID info */ - bool guid_valid; - u8 guid[16]; - u8 port_num; bool input; bool mcs; @@ -110,10 +104,12 @@ struct drm_dp_mst_port { * @tx_slots: transmission slots for this device. * @last_seqno: last sequence number used to talk to this. * @link_address_sent: if a link address message has been sent to this device yet. + * @guid: guid for DP 1.2 branch device. port under this branch can be + * identified by port #. * * This structure represents an MST branch device, there is one - * primary branch device at the root, along with any others connected - * to downstream ports + * primary branch device at the root, along with any other branches connected + * to downstream port of parent branches. */ struct drm_dp_mst_branch { struct kref kref; @@ -132,6 +128,9 @@ struct drm_dp_mst_branch { struct drm_dp_sideband_msg_tx *tx_slots[2]; int last_seqno; bool link_address_sent; + + /* global unique identifier to identify branch devices */ + u8 guid[16]; }; @@ -406,11 +405,9 @@ struct drm_dp_payload { * @conn_base_id: DRM connector ID this mgr is connected to. * @down_rep_recv: msg receiver state for down replies. * @up_req_recv: msg receiver state for up requests. - * @lock: protects mst state, primary, guid, dpcd. + * @lock: protects mst state, primary, dpcd. * @mst_state: if this manager is enabled for an MST capable port. * @mst_primary: pointer to the primary branch device. - * @guid_valid: GUID valid for the primary branch device. - * @guid: GUID for primary port. * @dpcd: cache of DPCD for primary port. * @pbn_div: PBN to slots divisor. * @@ -432,13 +429,11 @@ struct drm_dp_mst_topology_mgr { struct drm_dp_sideband_msg_rx up_req_recv; /* pointer to info about the initial MST device */ - struct mutex lock; /* protects mst_state + primary + guid + dpcd */ + struct mutex lock; /* protects mst_state + primary + dpcd */ bool mst_state; struct drm_dp_mst_branch *mst_primary; - /* primary MST device GUID */ - bool guid_valid; - u8 guid[16]; + u8 dpcd[DP_RECEIVER_CAP_SIZE]; u8 sink_count; int pbn_div; -- cgit v1.2.3 From 0f4a943168f31d29a1701908931acaba518b131a Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 19 Jan 2016 15:23:02 -0800 Subject: target: Fix remote-port TMR ABORT + se_cmd fabric stop To address the bug where fabric driver level shutdown of se_cmd occurs at the same time when TMR CMD_T_ABORTED is happening resulting in a -1 ->cmd_kref, this patch adds a CMD_T_FABRIC_STOP bit that is used to determine when TMR + driver I_T nexus shutdown is happening concurrently. It changes target_sess_cmd_list_set_waiting() to obtain se_cmd->cmd_kref + set CMD_T_FABRIC_STOP, and drop local reference in target_wait_for_sess_cmds() and invoke extra target_put_sess_cmd() during Task Aborted Status (TAS) when necessary. Also, it adds a new target_wait_free_cmd() wrapper around transport_wait_for_tasks() for the special case within transport_generic_free_cmd() to set CMD_T_FABRIC_STOP, and is now aware of CMD_T_ABORTED + CMD_T_TAS status bits to know when an extra transport_put_cmd() during TAS is required. Note transport_generic_free_cmd() is expected to block on cmd->cmd_wait_comp in order to follow what iscsi-target expects during iscsi_conn context se_cmd shutdown. Cc: Quinn Tran Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_tmr.c | 54 ++++++++---- drivers/target/target_core_transport.c | 145 +++++++++++++++++++++++++-------- include/target/target_core_base.h | 2 + 3 files changed, 150 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 3e0d77a4c7b6..82a663ba9800 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -75,16 +75,18 @@ void core_tmr_release_req(struct se_tmr_req *tmr) kfree(tmr); } -static void core_tmr_handle_tas_abort( - struct se_session *tmr_sess, - struct se_cmd *cmd, - int tas) +static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas) { - bool remove = true; + unsigned long flags; + bool remove = true, send_tas; /* * TASK ABORTED status (TAS) bit support */ - if (tmr_sess && tmr_sess != cmd->se_sess && tas) { + spin_lock_irqsave(&cmd->t_state_lock, flags); + send_tas = (cmd->transport_state & CMD_T_TAS); + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + + if (send_tas) { remove = false; transport_send_task_abort(cmd); } @@ -107,7 +109,8 @@ static int target_check_cdb_and_preempt(struct list_head *list, return 1; } -static bool __target_check_io_state(struct se_cmd *se_cmd) +static bool __target_check_io_state(struct se_cmd *se_cmd, + struct se_session *tmr_sess, int tas) { struct se_session *sess = se_cmd->se_sess; @@ -115,21 +118,32 @@ static bool __target_check_io_state(struct se_cmd *se_cmd) WARN_ON_ONCE(!irqs_disabled()); /* * If command already reached CMD_T_COMPLETE state within - * target_complete_cmd(), this se_cmd has been passed to - * fabric driver and will not be aborted. + * target_complete_cmd() or CMD_T_FABRIC_STOP due to shutdown, + * this se_cmd has been passed to fabric driver and will + * not be aborted. * * Otherwise, obtain a local se_cmd->cmd_kref now for TMR * ABORT_TASK + LUN_RESET for CMD_T_ABORTED processing as * long as se_cmd->cmd_kref is still active unless zero. */ spin_lock(&se_cmd->t_state_lock); - if (se_cmd->transport_state & CMD_T_COMPLETE) { - pr_debug("Attempted to abort io tag: %llu already complete," + if (se_cmd->transport_state & (CMD_T_COMPLETE | CMD_T_FABRIC_STOP)) { + pr_debug("Attempted to abort io tag: %llu already complete or" + " fabric stop, skipping\n", se_cmd->tag); + spin_unlock(&se_cmd->t_state_lock); + return false; + } + if (sess->sess_tearing_down || se_cmd->cmd_wait_set) { + pr_debug("Attempted to abort io tag: %llu already shutdown," " skipping\n", se_cmd->tag); spin_unlock(&se_cmd->t_state_lock); return false; } se_cmd->transport_state |= CMD_T_ABORTED; + + if ((tmr_sess != se_cmd->se_sess) && tas) + se_cmd->transport_state |= CMD_T_TAS; + spin_unlock(&se_cmd->t_state_lock); return kref_get_unless_zero(&se_cmd->cmd_kref); @@ -161,7 +175,7 @@ void core_tmr_abort_task( printk("ABORT_TASK: Found referenced %s task_tag: %llu\n", se_cmd->se_tfo->get_fabric_name(), ref_tag); - if (!__target_check_io_state(se_cmd)) { + if (!__target_check_io_state(se_cmd, se_sess, 0)) { spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); target_put_sess_cmd(se_cmd); goto out; @@ -230,7 +244,8 @@ static void core_tmr_drain_tmr_list( spin_lock(&sess->sess_cmd_lock); spin_lock(&cmd->t_state_lock); - if (!(cmd->transport_state & CMD_T_ACTIVE)) { + if (!(cmd->transport_state & CMD_T_ACTIVE) || + (cmd->transport_state & CMD_T_FABRIC_STOP)) { spin_unlock(&cmd->t_state_lock); spin_unlock(&sess->sess_cmd_lock); continue; @@ -240,15 +255,22 @@ static void core_tmr_drain_tmr_list( spin_unlock(&sess->sess_cmd_lock); continue; } + if (sess->sess_tearing_down || cmd->cmd_wait_set) { + spin_unlock(&cmd->t_state_lock); + spin_unlock(&sess->sess_cmd_lock); + continue; + } cmd->transport_state |= CMD_T_ABORTED; spin_unlock(&cmd->t_state_lock); rc = kref_get_unless_zero(&cmd->cmd_kref); - spin_unlock(&sess->sess_cmd_lock); if (!rc) { printk("LUN_RESET TMR: non-zero kref_get_unless_zero\n"); + spin_unlock(&sess->sess_cmd_lock); continue; } + spin_unlock(&sess->sess_cmd_lock); + list_move_tail(&tmr_p->tmr_list, &drain_tmr_list); } spin_unlock_irqrestore(&dev->se_tmr_lock, flags); @@ -325,7 +347,7 @@ static void core_tmr_drain_state_list( continue; spin_lock(&sess->sess_cmd_lock); - rc = __target_check_io_state(cmd); + rc = __target_check_io_state(cmd, tmr_sess, tas); spin_unlock(&sess->sess_cmd_lock); if (!rc) continue; @@ -364,7 +386,7 @@ static void core_tmr_drain_state_list( cancel_work_sync(&cmd->work); transport_wait_for_tasks(cmd); - core_tmr_handle_tas_abort(tmr_sess, cmd, tas); + core_tmr_handle_tas_abort(cmd, tas); target_put_sess_cmd(cmd); } } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 94e372af9e28..3441b159e306 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2431,18 +2431,33 @@ static void transport_write_pending_qf(struct se_cmd *cmd) } } +static bool +__transport_wait_for_tasks(struct se_cmd *, bool, bool *, bool *, + unsigned long *flags); + +static void target_wait_free_cmd(struct se_cmd *cmd, bool *aborted, bool *tas) +{ + unsigned long flags; + + spin_lock_irqsave(&cmd->t_state_lock, flags); + __transport_wait_for_tasks(cmd, true, aborted, tas, &flags); + spin_unlock_irqrestore(&cmd->t_state_lock, flags); +} + int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) { int ret = 0; + bool aborted = false, tas = false; if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD)) { if (wait_for_tasks && (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) - transport_wait_for_tasks(cmd); + target_wait_free_cmd(cmd, &aborted, &tas); - ret = transport_put_cmd(cmd); + if (!aborted || tas) + ret = transport_put_cmd(cmd); } else { if (wait_for_tasks) - transport_wait_for_tasks(cmd); + target_wait_free_cmd(cmd, &aborted, &tas); /* * Handle WRITE failure case where transport_generic_new_cmd() * has already added se_cmd to state_list, but fabric has @@ -2454,7 +2469,20 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) if (cmd->se_lun) transport_lun_remove_cmd(cmd); - ret = transport_put_cmd(cmd); + if (!aborted || tas) + ret = transport_put_cmd(cmd); + } + /* + * If the task has been internally aborted due to TMR ABORT_TASK + * or LUN_RESET, target_core_tmr.c is responsible for performing + * the remaining calls to target_put_sess_cmd(), and not the + * callers of this function. + */ + if (aborted) { + pr_debug("Detected CMD_T_ABORTED for ITT: %llu\n", cmd->tag); + wait_for_completion(&cmd->cmd_wait_comp); + cmd->se_tfo->release_cmd(cmd); + ret = 1; } return ret; } @@ -2509,6 +2537,7 @@ static void target_release_cmd_kref(struct kref *kref) struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref); struct se_session *se_sess = se_cmd->se_sess; unsigned long flags; + bool fabric_stop; spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); if (list_empty(&se_cmd->se_cmd_list)) { @@ -2517,13 +2546,19 @@ static void target_release_cmd_kref(struct kref *kref) se_cmd->se_tfo->release_cmd(se_cmd); return; } - if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) { + + spin_lock(&se_cmd->t_state_lock); + fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP); + spin_unlock(&se_cmd->t_state_lock); + + if (se_cmd->cmd_wait_set || fabric_stop) { + list_del_init(&se_cmd->se_cmd_list); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); target_free_cmd_mem(se_cmd); complete(&se_cmd->cmd_wait_comp); return; } - list_del(&se_cmd->se_cmd_list); + list_del_init(&se_cmd->se_cmd_list); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); target_free_cmd_mem(se_cmd); @@ -2555,6 +2590,7 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) { struct se_cmd *se_cmd; unsigned long flags; + int rc; spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); if (se_sess->sess_tearing_down) { @@ -2564,8 +2600,15 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) se_sess->sess_tearing_down = 1; list_splice_init(&se_sess->sess_cmd_list, &se_sess->sess_wait_list); - list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) - se_cmd->cmd_wait_set = 1; + list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) { + rc = kref_get_unless_zero(&se_cmd->cmd_kref); + if (rc) { + se_cmd->cmd_wait_set = 1; + spin_lock(&se_cmd->t_state_lock); + se_cmd->transport_state |= CMD_T_FABRIC_STOP; + spin_unlock(&se_cmd->t_state_lock); + } + } spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); } @@ -2578,15 +2621,25 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) { struct se_cmd *se_cmd, *tmp_cmd; unsigned long flags; + bool tas; list_for_each_entry_safe(se_cmd, tmp_cmd, &se_sess->sess_wait_list, se_cmd_list) { - list_del(&se_cmd->se_cmd_list); + list_del_init(&se_cmd->se_cmd_list); pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:" " %d\n", se_cmd, se_cmd->t_state, se_cmd->se_tfo->get_cmd_state(se_cmd)); + spin_lock_irqsave(&se_cmd->t_state_lock, flags); + tas = (se_cmd->transport_state & CMD_T_TAS); + spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); + + if (!target_put_sess_cmd(se_cmd)) { + if (tas) + target_put_sess_cmd(se_cmd); + } + wait_for_completion(&se_cmd->cmd_wait_comp); pr_debug("After cmd_wait_comp: se_cmd: %p t_state: %d" " fabric state: %d\n", se_cmd, se_cmd->t_state, @@ -2608,53 +2661,75 @@ void transport_clear_lun_ref(struct se_lun *lun) wait_for_completion(&lun->lun_ref_comp); } -/** - * transport_wait_for_tasks - wait for completion to occur - * @cmd: command to wait - * - * Called from frontend fabric context to wait for storage engine - * to pause and/or release frontend generated struct se_cmd. - */ -bool transport_wait_for_tasks(struct se_cmd *cmd) +static bool +__transport_wait_for_tasks(struct se_cmd *cmd, bool fabric_stop, + bool *aborted, bool *tas, unsigned long *flags) + __releases(&cmd->t_state_lock) + __acquires(&cmd->t_state_lock) { - unsigned long flags; - spin_lock_irqsave(&cmd->t_state_lock, flags); + assert_spin_locked(&cmd->t_state_lock); + WARN_ON_ONCE(!irqs_disabled()); + + if (fabric_stop) + cmd->transport_state |= CMD_T_FABRIC_STOP; + + if (cmd->transport_state & CMD_T_ABORTED) + *aborted = true; + + if (cmd->transport_state & CMD_T_TAS) + *tas = true; + if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD) && - !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) return false; - } if (!(cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) && - !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) return false; - } - if (!(cmd->transport_state & CMD_T_ACTIVE)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + if (!(cmd->transport_state & CMD_T_ACTIVE)) + return false; + + if (fabric_stop && *aborted) return false; - } cmd->transport_state |= CMD_T_STOP; - pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08llx i_state: %d, t_state: %d, CMD_T_STOP\n", - cmd, cmd->tag, cmd->se_tfo->get_cmd_state(cmd), cmd->t_state); + pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08llx i_state: %d," + " t_state: %d, CMD_T_STOP\n", cmd, cmd->tag, + cmd->se_tfo->get_cmd_state(cmd), cmd->t_state); - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + spin_unlock_irqrestore(&cmd->t_state_lock, *flags); wait_for_completion(&cmd->t_transport_stop_comp); - spin_lock_irqsave(&cmd->t_state_lock, flags); + spin_lock_irqsave(&cmd->t_state_lock, *flags); cmd->transport_state &= ~(CMD_T_ACTIVE | CMD_T_STOP); - pr_debug("wait_for_tasks: Stopped wait_for_completion(&cmd->t_transport_stop_comp) for ITT: 0x%08llx\n", - cmd->tag); + pr_debug("wait_for_tasks: Stopped wait_for_completion(&cmd->" + "t_transport_stop_comp) for ITT: 0x%08llx\n", cmd->tag); + return true; +} + +/** + * transport_wait_for_tasks - wait for completion to occur + * @cmd: command to wait + * + * Called from frontend fabric context to wait for storage engine + * to pause and/or release frontend generated struct se_cmd. + */ +bool transport_wait_for_tasks(struct se_cmd *cmd) +{ + unsigned long flags; + bool ret, aborted = false, tas = false; + + spin_lock_irqsave(&cmd->t_state_lock, flags); + ret = __transport_wait_for_tasks(cmd, false, &aborted, &tas, &flags); spin_unlock_irqrestore(&cmd->t_state_lock, flags); - return true; + return ret; } EXPORT_SYMBOL(transport_wait_for_tasks); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 1a76726c45a2..1579539e5669 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -493,6 +493,8 @@ struct se_cmd { #define CMD_T_DEV_ACTIVE (1 << 7) #define CMD_T_REQUEST_STOP (1 << 8) #define CMD_T_BUSY (1 << 9) +#define CMD_T_TAS (1 << 10) +#define CMD_T_FABRIC_STOP (1 << 11) spinlock_t t_state_lock; struct kref cmd_kref; struct completion t_transport_stop_comp; -- cgit v1.2.3 From 080fe2068e1c7f19f565b30b78baf78edf16a980 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 5 Feb 2016 15:36:41 -0800 Subject: mm, hugetlb: don't require CMA for runtime gigantic pages Commit 944d9fec8d7a ("hugetlb: add support for gigantic page allocation at runtime") has added the runtime gigantic page allocation via alloc_contig_range(), making this support available only when CONFIG_CMA is enabled. Because it doesn't depend on MIGRATE_CMA pageblocks and the associated infrastructure, it is possible with few simple adjustments to require only CONFIG_MEMORY_ISOLATION instead of full CONFIG_CMA. After this patch, alloc_contig_range() and related functions are available and used for gigantic pages with just CONFIG_MEMORY_ISOLATION enabled. Note CONFIG_CMA selects CONFIG_MEMORY_ISOLATION. This allows supporting runtime gigantic pages without the CMA-specific checks in page allocator fastpaths. Signed-off-by: Vlastimil Babka Cc: Luiz Capitulino Cc: Kirill A. Shutemov Cc: Zhang Yanfei Cc: Yasuaki Ishimatsu Cc: Joonsoo Kim Cc: Naoya Horiguchi Cc: Mel Gorman Cc: Davidlohr Bueso Cc: Hillf Danton Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/hugetlbpage.c | 4 ++-- include/linux/gfp.h | 6 +++--- mm/hugetlb.c | 2 +- mm/page_alloc.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 42982b26e32b..740d7ac03a55 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -173,10 +173,10 @@ static __init int setup_hugepagesz(char *opt) } __setup("hugepagesz=", setup_hugepagesz); -#ifdef CONFIG_CMA +#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) static __init int gigantic_pages_init(void) { - /* With CMA we can allocate gigantic pages at runtime */ + /* With compaction or CMA we can allocate gigantic pages at runtime */ if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT)) hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); return 0; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 28ad5f6494b0..af1f2b24bbe4 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -547,16 +547,16 @@ static inline bool pm_suspended_storage(void) } #endif /* CONFIG_PM_SLEEP */ -#ifdef CONFIG_CMA - +#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) /* The below functions must be run on a range from a single zone. */ extern int alloc_contig_range(unsigned long start, unsigned long end, unsigned migratetype); extern void free_contig_range(unsigned long pfn, unsigned nr_pages); +#endif +#ifdef CONFIG_CMA /* CMA stuff */ extern void init_cma_reserved_pageblock(struct page *page); - #endif #endif /* __LINUX_GFP_H */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d7a802427ea8..06ae13e869d0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1001,7 +1001,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) ((node = hstate_next_node_to_free(hs, mask)) || 1); \ nr_nodes--) -#if defined(CONFIG_CMA) && defined(CONFIG_X86_64) +#if defined(CONFIG_X86_64) && ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)) static void destroy_compound_gigantic_page(struct page *page, unsigned int order) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ea2c4d3e0c03..838ca8bb64f7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6620,7 +6620,7 @@ bool is_pageblock_removable_nolock(struct page *page) return !has_unmovable_pages(zone, page, 0, true); } -#ifdef CONFIG_CMA +#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) static unsigned long pfn_max_align_down(unsigned long pfn) { -- cgit v1.2.3 From 12352d3cae2cebe18805a91fab34b534d7444231 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 5 Feb 2016 15:36:50 -0800 Subject: mm: replace vma_lock_anon_vma with anon_vma_lock_read/write Sequence vma_lock_anon_vma() - vma_unlock_anon_vma() isn't safe if anon_vma appeared between lock and unlock. We have to check anon_vma first or call anon_vma_prepare() to be sure that it's here. There are only few users of these legacy helpers. Let's get rid of them. This patch fixes anon_vma lock imbalance in validate_mm(). Write lock isn't required here, read lock is enough. And reorders expand_downwards/expand_upwards: security_mmap_addr() and wrapping-around check don't have to be under anon vma lock. Link: https://lkml.kernel.org/r/CACT4Y+Y908EjM2z=706dv4rV6dWtxTLK9nFg9_7DhRMLppBo2g@mail.gmail.com Signed-off-by: Konstantin Khlebnikov Reported-by: Dmitry Vyukov Acked-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 14 ------------- mm/mmap.c | 55 ++++++++++++++++++++++++---------------------------- 2 files changed, 25 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bdf597c4f0be..a07f42bedda3 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -109,20 +109,6 @@ static inline void put_anon_vma(struct anon_vma *anon_vma) __put_anon_vma(anon_vma); } -static inline void vma_lock_anon_vma(struct vm_area_struct *vma) -{ - struct anon_vma *anon_vma = vma->anon_vma; - if (anon_vma) - down_write(&anon_vma->root->rwsem); -} - -static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) -{ - struct anon_vma *anon_vma = vma->anon_vma; - if (anon_vma) - up_write(&anon_vma->root->rwsem); -} - static inline void anon_vma_lock_write(struct anon_vma *anon_vma) { down_write(&anon_vma->root->rwsem); diff --git a/mm/mmap.c b/mm/mmap.c index 918c9ec5043f..2f2415a7a688 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -459,12 +459,16 @@ static void validate_mm(struct mm_struct *mm) struct vm_area_struct *vma = mm->mmap; while (vma) { + struct anon_vma *anon_vma = vma->anon_vma; struct anon_vma_chain *avc; - vma_lock_anon_vma(vma); - list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) - anon_vma_interval_tree_verify(avc); - vma_unlock_anon_vma(vma); + if (anon_vma) { + anon_vma_lock_read(anon_vma); + list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) + anon_vma_interval_tree_verify(avc); + anon_vma_unlock_read(anon_vma); + } + highest_address = vma->vm_end; vma = vma->vm_next; i++; @@ -2145,32 +2149,27 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns int expand_upwards(struct vm_area_struct *vma, unsigned long address) { struct mm_struct *mm = vma->vm_mm; - int error; + int error = 0; if (!(vma->vm_flags & VM_GROWSUP)) return -EFAULT; - /* - * We must make sure the anon_vma is allocated - * so that the anon_vma locking is not a noop. - */ + /* Guard against wrapping around to address 0. */ + if (address < PAGE_ALIGN(address+4)) + address = PAGE_ALIGN(address+4); + else + return -ENOMEM; + + /* We must make sure the anon_vma is allocated. */ if (unlikely(anon_vma_prepare(vma))) return -ENOMEM; - vma_lock_anon_vma(vma); /* * vma->vm_start/vm_end cannot change under us because the caller * is required to hold the mmap_sem in read mode. We need the * anon_vma lock to serialize against concurrent expand_stacks. - * Also guard against wrapping around to address 0. */ - if (address < PAGE_ALIGN(address+4)) - address = PAGE_ALIGN(address+4); - else { - vma_unlock_anon_vma(vma); - return -ENOMEM; - } - error = 0; + anon_vma_lock_write(vma->anon_vma); /* Somebody else might have raced and expanded it already */ if (address > vma->vm_end) { @@ -2188,7 +2187,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) * updates, but we only hold a shared mmap_sem * lock here, so we need to protect against * concurrent vma expansions. - * vma_lock_anon_vma() doesn't help here, as + * anon_vma_lock_write() doesn't help here, as * we don't guarantee that all growable vmas * in a mm share the same root anon vma. * So, we reuse mm->page_table_lock to guard @@ -2211,7 +2210,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) } } } - vma_unlock_anon_vma(vma); + anon_vma_unlock_write(vma->anon_vma); khugepaged_enter_vma_merge(vma, vma->vm_flags); validate_mm(mm); return error; @@ -2227,25 +2226,21 @@ int expand_downwards(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; int error; - /* - * We must make sure the anon_vma is allocated - * so that the anon_vma locking is not a noop. - */ - if (unlikely(anon_vma_prepare(vma))) - return -ENOMEM; - address &= PAGE_MASK; error = security_mmap_addr(address); if (error) return error; - vma_lock_anon_vma(vma); + /* We must make sure the anon_vma is allocated. */ + if (unlikely(anon_vma_prepare(vma))) + return -ENOMEM; /* * vma->vm_start/vm_end cannot change under us because the caller * is required to hold the mmap_sem in read mode. We need the * anon_vma lock to serialize against concurrent expand_stacks. */ + anon_vma_lock_write(vma->anon_vma); /* Somebody else might have raced and expanded it already */ if (address < vma->vm_start) { @@ -2263,7 +2258,7 @@ int expand_downwards(struct vm_area_struct *vma, * updates, but we only hold a shared mmap_sem * lock here, so we need to protect against * concurrent vma expansions. - * vma_lock_anon_vma() doesn't help here, as + * anon_vma_lock_write() doesn't help here, as * we don't guarantee that all growable vmas * in a mm share the same root anon vma. * So, we reuse mm->page_table_lock to guard @@ -2284,7 +2279,7 @@ int expand_downwards(struct vm_area_struct *vma, } } } - vma_unlock_anon_vma(vma); + anon_vma_unlock_write(vma->anon_vma); khugepaged_enter_vma_merge(vma, vma->vm_flags); validate_mm(mm); return error; -- cgit v1.2.3 From 732042821cfa106b3c20b9780e4c60fee9d68900 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 5 Feb 2016 15:37:01 -0800 Subject: radix-tree: fix oops after radix_tree_iter_retry Helper radix_tree_iter_retry() resets next_index to the current index. In following radix_tree_next_slot current chunk size becomes zero. This isn't checked and it tries to dereference null pointer in slot. Tagged iterator is fine because retry happens only at slot 0 where tag bitmask in iter->tags is filled with single bit. Fixes: 46437f9a554f ("radix-tree: fix race in gang lookup") Signed-off-by: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: Hugh Dickins Cc: Ohad Ben-Cohen Cc: Jeremiah Mahler Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 00b17c526c1f..f54be7082207 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -400,7 +400,7 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter) * @iter: pointer to radix tree iterator * Returns: current chunk size */ -static __always_inline unsigned +static __always_inline long radix_tree_chunk_size(struct radix_tree_iter *iter) { return iter->next_index - iter->index; @@ -434,9 +434,9 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) return slot + offset + 1; } } else { - unsigned size = radix_tree_chunk_size(iter) - 1; + long size = radix_tree_chunk_size(iter); - while (size--) { + while (--size > 0) { slot++; iter->index++; if (likely(*slot)) -- cgit v1.2.3 From 57dae19065bde296dfdf08b8e46c102a671ff741 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 29 Jan 2016 00:07:25 -0800 Subject: target: Drop legacy se_cmd->task_stop_comp + REQUEST_STOP usage With CMD_T_FABRIC_STOP + se_cmd->cmd_wait_set usage in place, go ahead and drop left-over CMD_T_REQUEST_STOP checks in target_complete_cmd() and unused target_stop_cmd(). Reviewed-by: Christoph Hellwig Cc: Quinn Tran Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_internal.h | 1 - drivers/target/target_core_transport.c | 37 ---------------------------------- include/target/target_core_base.h | 4 ---- 3 files changed, 42 deletions(-) (limited to 'include') diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index dae0750c2032..db4412fe6b8a 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -141,7 +141,6 @@ void transport_dump_vpd_proto_id(struct t10_vpd *, unsigned char *, int); int transport_dump_vpd_assoc(struct t10_vpd *, unsigned char *, int); int transport_dump_vpd_ident_type(struct t10_vpd *, unsigned char *, int); int transport_dump_vpd_ident(struct t10_vpd *, unsigned char *, int); -bool target_stop_cmd(struct se_cmd *cmd, unsigned long *flags); void transport_clear_lun_ref(struct se_lun *); void transport_send_task_abort(struct se_cmd *); sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 2e0b23a3d5c4..d92cb644d8f9 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -692,15 +692,6 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) success = 1; } - /* - * See if we are waiting to complete for an exception condition. - */ - if (cmd->transport_state & CMD_T_REQUEST_STOP) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - complete(&cmd->task_stop_comp); - return; - } - /* * Check for case where an explicit ABORT_TASK has been received * and transport_wait_for_tasks() will be waiting for completion.. @@ -1202,7 +1193,6 @@ void transport_init_se_cmd( INIT_LIST_HEAD(&cmd->state_list); init_completion(&cmd->t_transport_stop_comp); init_completion(&cmd->cmd_wait_comp); - init_completion(&cmd->task_stop_comp); spin_lock_init(&cmd->t_state_lock); kref_init(&cmd->cmd_kref); cmd->transport_state = CMD_T_DEV_ACTIVE; @@ -1633,33 +1623,6 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess, } EXPORT_SYMBOL(target_submit_tmr); -/* - * If the cmd is active, request it to be stopped and sleep until it - * has completed. - */ -bool target_stop_cmd(struct se_cmd *cmd, unsigned long *flags) - __releases(&cmd->t_state_lock) - __acquires(&cmd->t_state_lock) -{ - bool was_active = false; - - if (cmd->transport_state & CMD_T_BUSY) { - cmd->transport_state |= CMD_T_REQUEST_STOP; - spin_unlock_irqrestore(&cmd->t_state_lock, *flags); - - pr_debug("cmd %p waiting to complete\n", cmd); - wait_for_completion(&cmd->task_stop_comp); - pr_debug("cmd %p stopped successfully\n", cmd); - - spin_lock_irqsave(&cmd->t_state_lock, *flags); - cmd->transport_state &= ~CMD_T_REQUEST_STOP; - cmd->transport_state &= ~CMD_T_BUSY; - was_active = true; - } - - return was_active; -} - /* * Handle SAM-esque emulation for generic transport request failures. */ diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 1579539e5669..d71a3ead9e63 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -491,7 +491,6 @@ struct se_cmd { #define CMD_T_SENT (1 << 4) #define CMD_T_STOP (1 << 5) #define CMD_T_DEV_ACTIVE (1 << 7) -#define CMD_T_REQUEST_STOP (1 << 8) #define CMD_T_BUSY (1 << 9) #define CMD_T_TAS (1 << 10) #define CMD_T_FABRIC_STOP (1 << 11) @@ -514,9 +513,6 @@ struct se_cmd { struct list_head state_list; - /* old task stop completion, consider merging with some of the above */ - struct completion task_stop_comp; - /* backend private data */ void *priv; -- cgit v1.2.3 From 1f55c718c290616889c04946864a13ef30f64929 Mon Sep 17 00:00:00 2001 From: "Herton R. Krzesinski" Date: Thu, 14 Jan 2016 17:56:58 -0200 Subject: pty: make sure super_block is still valid in final /dev/tty close Considering current pty code and multiple devpts instances, it's possible to umount a devpts file system while a program still has /dev/tty opened pointing to a previosuly closed pty pair in that instance. In the case all ptmx and pts/N files are closed, umount can be done. If the program closes /dev/tty after umount is done, devpts_kill_index will use now an invalid super_block, which was already destroyed in the umount operation after running ->kill_sb. This is another "use after free" type of issue, but now related to the allocated super_block instance. To avoid the problem (warning at ida_remove and potential crashes) for this specific case, I added two functions in devpts which grabs additional references to the super_block, which pty code now uses so it makes sure the super block structure is still valid until pty shutdown is done. I also moved the additional inode references to the same functions, which also covered similar case with inode being freed before /dev/tty final close/shutdown. Signed-off-by: Herton R. Krzesinski Cc: stable@vger.kernel.org # 2.6.29+ Reviewed-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/pty.c | 9 ++++++--- fs/devpts/inode.c | 20 ++++++++++++++++++++ include/linux/devpts_fs.h | 4 ++++ 3 files changed, 30 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 3b5cde833ee5..2348fa613707 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -688,7 +688,7 @@ static void pty_unix98_shutdown(struct tty_struct *tty) else ptmx_inode = tty->link->driver_data; devpts_kill_index(ptmx_inode, tty->index); - iput(ptmx_inode); /* drop reference we acquired at ptmx_open */ + devpts_del_ref(ptmx_inode); } static const struct tty_operations ptm_unix98_ops = { @@ -785,9 +785,12 @@ static int ptmx_open(struct inode *inode, struct file *filp) * still have /dev/tty opened pointing to the master/slave pair (ptmx * is closed/released before /dev/tty), we must make sure that the inode * is still valid when we call the final pty_unix98_shutdown, thus we - * hold an additional reference to the ptmx inode + * hold an additional reference to the ptmx inode. For the same /dev/tty + * last close case, we also need to make sure the super_block isn't + * destroyed (devpts instance unmounted), before /dev/tty is closed and + * on its release devpts_kill_index is called. */ - ihold(inode); + devpts_add_ref(inode); tty_add_file(tty, filp); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 1f107fd51328..655f21f99160 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -575,6 +575,26 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx) mutex_unlock(&allocated_ptys_lock); } +/* + * pty code needs to hold extra references in case of last /dev/tty close + */ + +void devpts_add_ref(struct inode *ptmx_inode) +{ + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + + atomic_inc(&sb->s_active); + ihold(ptmx_inode); +} + +void devpts_del_ref(struct inode *ptmx_inode) +{ + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + + iput(ptmx_inode); + deactivate_super(sb); +} + /** * devpts_pty_new -- create a new inode in /dev/pts/ * @ptmx_inode: inode of the master diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 251a2090a554..e0ee0b3000b2 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -19,6 +19,8 @@ int devpts_new_index(struct inode *ptmx_inode); void devpts_kill_index(struct inode *ptmx_inode, int idx); +void devpts_add_ref(struct inode *ptmx_inode); +void devpts_del_ref(struct inode *ptmx_inode); /* mknod in devpts */ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, void *priv); @@ -32,6 +34,8 @@ void devpts_pty_kill(struct inode *inode); /* Dummy stubs in the no-pty case */ static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; } static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { } +static inline void devpts_add_ref(struct inode *ptmx_inode) { } +static inline void devpts_del_ref(struct inode *ptmx_inode) { } static inline struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, void *priv) { -- cgit v1.2.3 From 415e3d3e90ce9e18727e8843ae343eda5a58fad6 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 3 Feb 2016 02:11:03 +0100 Subject: unix: correctly track in-flight fds in sending process user_struct The commit referenced in the Fixes tag incorrectly accounted the number of in-flight fds over a unix domain socket to the original opener of the file-descriptor. This allows another process to arbitrary deplete the original file-openers resource limit for the maximum of open files. Instead the sending processes and its struct cred should be credited. To do so, we add a reference counted struct user_struct pointer to the scm_fp_list and use it to account for the number of inflight unix fds. Fixes: 712f4aad406bb1 ("unix: properly account for FDs passed over unix sockets") Reported-by: David Herrmann Cc: David Herrmann Cc: Willy Tarreau Cc: Linus Torvalds Suggested-by: Linus Torvalds Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/af_unix.h | 4 ++-- include/net/scm.h | 1 + net/core/scm.c | 7 +++++++ net/unix/af_unix.c | 4 ++-- net/unix/garbage.c | 8 ++++---- 5 files changed, 16 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 2a91a0561a47..9b4c418bebd8 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -6,8 +6,8 @@ #include #include -void unix_inflight(struct file *fp); -void unix_notinflight(struct file *fp); +void unix_inflight(struct user_struct *user, struct file *fp); +void unix_notinflight(struct user_struct *user, struct file *fp); void unix_gc(void); void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); diff --git a/include/net/scm.h b/include/net/scm.h index 262532d111f5..59fa93c01d2a 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -21,6 +21,7 @@ struct scm_creds { struct scm_fp_list { short count; short max; + struct user_struct *user; struct file *fp[SCM_MAX_FD]; }; diff --git a/net/core/scm.c b/net/core/scm.c index 14596fb37172..2696aefdc148 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) *fplp = fpl; fpl->count = 0; fpl->max = SCM_MAX_FD; + fpl->user = NULL; } fpp = &fpl->fp[fpl->count]; @@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) *fpp++ = file; fpl->count++; } + + if (!fpl->user) + fpl->user = get_uid(current_user()); + return num; } @@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *scm) scm->fp = NULL; for (i=fpl->count-1; i>=0; i--) fput(fpl->fp[i]); + free_uid(fpl->user); kfree(fpl); } } @@ -336,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); new_fpl->max = new_fpl->count; + new_fpl->user = get_uid(fpl->user); } return new_fpl; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 49d5093eb055..29be035f9c65 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1496,7 +1496,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) - unix_notinflight(scm->fp->fp[i]); + unix_notinflight(scm->fp->user, scm->fp->fp[i]); } static void unix_destruct_scm(struct sk_buff *skb) @@ -1561,7 +1561,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) return -ENOMEM; for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); + unix_inflight(scm->fp->user, scm->fp->fp[i]); return max_level; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 8fcdc2283af5..6a0d48525fcf 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -116,7 +116,7 @@ struct sock *unix_get_socket(struct file *filp) * descriptor if it is for an AF_UNIX socket. */ -void unix_inflight(struct file *fp) +void unix_inflight(struct user_struct *user, struct file *fp) { struct sock *s = unix_get_socket(fp); @@ -133,11 +133,11 @@ void unix_inflight(struct file *fp) } unix_tot_inflight++; } - fp->f_cred->user->unix_inflight++; + user->unix_inflight++; spin_unlock(&unix_gc_lock); } -void unix_notinflight(struct file *fp) +void unix_notinflight(struct user_struct *user, struct file *fp) { struct sock *s = unix_get_socket(fp); @@ -152,7 +152,7 @@ void unix_notinflight(struct file *fp) list_del_init(&u->link); unix_tot_inflight--; } - fp->f_cred->user->unix_inflight--; + user->unix_inflight--; spin_unlock(&unix_gc_lock); } -- cgit v1.2.3 From 9cf7490360bf2c46a16b7525f899e4970c5fc144 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Feb 2016 19:31:12 -0800 Subject: tcp: do not drop syn_recv on all icmp reports Petr Novopashenniy reported that ICMP redirects on SYN_RECV sockets were leading to RST. This is of course incorrect. A specific list of ICMP messages should be able to drop a SYN_RECV. For instance, a REDIRECT on SYN_RECV shall be ignored, as we do not hold a dst per SYN_RECV pseudo request. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111751 Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Reported-by: Petr Novopashenniy Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/tcp_ipv4.c | 11 ++++++++--- net/ipv6/tcp_ipv6.c | 5 +++-- 3 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index f6f8f032c73e..ae6468f5c9f3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -447,7 +447,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); void tcp_v4_mtu_reduced(struct sock *sk); -void tcp_req_err(struct sock *sk, u32 seq); +void tcp_req_err(struct sock *sk, u32 seq, bool abort); int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); struct sock *tcp_create_openreq_child(const struct sock *sk, struct request_sock *req, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a4d523709ab3..7f6ff037adaf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -311,7 +311,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ -void tcp_req_err(struct sock *sk, u32 seq) +void tcp_req_err(struct sock *sk, u32 seq, bool abort) { struct request_sock *req = inet_reqsk(sk); struct net *net = sock_net(sk); @@ -323,7 +323,7 @@ void tcp_req_err(struct sock *sk, u32 seq) if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - } else { + } else if (abort) { /* * Still in SYN_RECV, just remove it silently. * There is no good way to pass the error to the newly @@ -383,7 +383,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } seq = ntohl(th->seq); if (sk->sk_state == TCP_NEW_SYN_RECV) - return tcp_req_err(sk, seq); + return tcp_req_err(sk, seq, + type == ICMP_PARAMETERPROB || + type == ICMP_TIME_EXCEEDED || + (type == ICMP_DEST_UNREACH && + (code == ICMP_NET_UNREACH || + code == ICMP_HOST_UNREACH))); bh_lock_sock(sk); /* If too many ICMPs get dropped on busy diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 006396e31cb0..1a5a70fb8551 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -327,6 +327,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct tcp_sock *tp; __u32 seq, snd_una; struct sock *sk; + bool fatal; int err; sk = __inet6_lookup_established(net, &tcp_hashinfo, @@ -345,8 +346,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; } seq = ntohl(th->seq); + fatal = icmpv6_err_convert(type, code, &err); if (sk->sk_state == TCP_NEW_SYN_RECV) - return tcp_req_err(sk, seq); + return tcp_req_err(sk, seq, fatal); bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) @@ -400,7 +402,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { -- cgit v1.2.3 From 5f74f82ea34c0da80ea0b49192bb5ea06e063593 Mon Sep 17 00:00:00 2001 From: Hans Westgaard Ry Date: Wed, 3 Feb 2016 09:26:57 +0100 Subject: net:Add sysctl_max_skb_frags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Devices may have limits on the number of fragments in an skb they support. Current codebase uses a constant as maximum for number of fragments one skb can hold and use. When enabling scatter/gather and running traffic with many small messages the codebase uses the maximum number of fragments and may thereby violate the max for certain devices. The patch introduces a global variable as max number of fragments. Signed-off-by: Hans Westgaard Ry Reviewed-by: Håkon Bugge Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 2 ++ net/core/sysctl_net_core.c | 10 ++++++++++ net/ipv4/tcp.c | 4 ++-- 4 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 11f935c1a090..4ce9ff7086f4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -299,6 +299,7 @@ struct sk_buff; #else #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1) #endif +extern int sysctl_max_skb_frags; typedef struct skb_frag_struct skb_frag_t; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b2df375ec9c2..5bf88f58bee7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -79,6 +79,8 @@ struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; +int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; +EXPORT_SYMBOL(sysctl_max_skb_frags); /** * skb_panic - private function for out-of-line support diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 95b6139d710c..a6beb7b6ae55 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -26,6 +26,7 @@ static int zero = 0; static int one = 1; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; +static int max_skb_frags = MAX_SKB_FRAGS; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -392,6 +393,15 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "max_skb_frags", + .data = &sysctl_max_skb_frags, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &max_skb_frags, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 19746b3fcbbe..0c36ef4a3f86 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -940,7 +940,7 @@ new_segment: i = skb_shinfo(skb)->nr_frags; can_coalesce = skb_can_coalesce(skb, i, page, offset); - if (!can_coalesce && i >= MAX_SKB_FRAGS) { + if (!can_coalesce && i >= sysctl_max_skb_frags) { tcp_mark_push(tp, skb); goto new_segment; } @@ -1213,7 +1213,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i == MAX_SKB_FRAGS || !sg) { + if (i == sysctl_max_skb_frags || !sg) { tcp_mark_push(tp, skb); goto new_segment; } -- cgit v1.2.3 From 7e059158d57b79159eaf1f504825d19866ef2c42 Mon Sep 17 00:00:00 2001 From: David Wragg Date: Wed, 10 Feb 2016 00:05:58 +0000 Subject: vxlan, gre, geneve: Set a large MTU on ovs-created tunnel devices Prior to 4.3, openvswitch tunnel vports (vxlan, gre and geneve) could transmit vxlan packets of any size, constrained only by the ability to send out the resulting packets. 4.3 introduced netdevs corresponding to tunnel vports. These netdevs have an MTU, which limits the size of a packet that can be successfully encapsulated. The default MTU values are low (1500 or less), which is awkwardly small in the context of physical networks supporting jumbo frames, and leads to a conspicuous change in behaviour for userspace. Instead, set the MTU on openvswitch-created netdevs to be the relevant maximum (i.e. the maximum IP packet size minus any relevant overhead), effectively restoring the behaviour prior to 4.3. Signed-off-by: David Wragg Signed-off-by: David S. Miller --- drivers/net/geneve.c | 18 ++++++++++++++---- drivers/net/vxlan.c | 11 ++++++++--- include/net/ip_tunnels.h | 1 + net/ipv4/ip_gre.c | 8 ++++++++ net/ipv4/ip_tunnel.c | 20 +++++++++++++++++--- net/openvswitch/vport-vxlan.c | 2 ++ 6 files changed, 50 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index d2031ce8baea..028e3873c310 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1453,11 +1453,21 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, err = geneve_configure(net, dev, &geneve_remote_unspec, 0, 0, 0, htons(dst_port), true, 0); - if (err) { - free_netdev(dev); - return ERR_PTR(err); - } + if (err) + goto err; + + /* openvswitch users expect packet sizes to be unrestricted, + * so set the largest MTU we can. + */ + err = geneve_change_mtu(dev, IP_MAX_MTU); + if (err) + goto err; + return dev; + + err: + free_netdev(dev); + return ERR_PTR(err); } EXPORT_SYMBOL_GPL(geneve_dev_create_fb); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e992c6a05f86..a31cd954b308 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2779,6 +2779,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, int err; bool use_ipv6 = false; __be16 default_port = vxlan->cfg.dst_port; + struct net_device *lowerdev = NULL; vxlan->net = src_net; @@ -2799,9 +2800,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, } if (conf->remote_ifindex) { - struct net_device *lowerdev - = __dev_get_by_index(src_net, conf->remote_ifindex); - + lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex); dst->remote_ifindex = conf->remote_ifindex; if (!lowerdev) { @@ -2825,6 +2824,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, needed_headroom = lowerdev->hard_header_len; } + if (conf->mtu) { + err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false); + if (err) + return err; + } + if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA) needed_headroom += VXLAN6_HEADROOM; else diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 6db96ea0144f..dda9abf6b89c 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -230,6 +230,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, u8 *protocol, struct flowi4 *fl4); +int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 7c51c4e1661f..56fdf4e0dce4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1240,6 +1240,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, err = ipgre_newlink(net, dev, tb, NULL); if (err < 0) goto out; + + /* openvswitch users expect packet sizes to be unrestricted, + * so set the largest MTU we can. + */ + err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); + if (err) + goto out; + return dev; out: free_netdev(dev); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index c7bd72e9b544..89e8861e05fc 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -943,17 +943,31 @@ done: } EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); -int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); + int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; - if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) + if (new_mtu < 68) return -EINVAL; + + if (new_mtu > max_mtu) { + if (strict) + return -EINVAL; + + new_mtu = max_mtu; + } + dev->mtu = new_mtu; return 0; } +EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); + +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + return __ip_tunnel_change_mtu(dev, new_mtu, true); +} EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); static void ip_tunnel_dev_free(struct net_device *dev) diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 1605691d9414..de9cb19efb6a 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -91,6 +91,8 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) struct vxlan_config conf = { .no_share = true, .flags = VXLAN_F_COLLECT_METADATA, + /* Don't restrict the packets that can be sent by MTU */ + .mtu = IP_MAX_MTU, }; if (!options) { -- cgit v1.2.3 From 73500267c930baadadb0d02284909731baf151f7 Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Mon, 8 Feb 2016 14:48:11 -0500 Subject: lib/ucs2_string: Add ucs2 -> utf8 helper functions This adds ucs2_utf8size(), which tells us how big our ucs2 string is in bytes, and ucs2_as_utf8, which translates from ucs2 to utf8.. Signed-off-by: Peter Jones Tested-by: Lee, Chun-Yi Acked-by: Matthew Garrett Signed-off-by: Matt Fleming --- include/linux/ucs2_string.h | 4 +++ lib/ucs2_string.c | 62 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'include') diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h index cbb20afdbc01..bb679b48f408 100644 --- a/include/linux/ucs2_string.h +++ b/include/linux/ucs2_string.h @@ -11,4 +11,8 @@ unsigned long ucs2_strlen(const ucs2_char_t *s); unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength); int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len); +unsigned long ucs2_utf8size(const ucs2_char_t *src); +unsigned long ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, + unsigned long maxlength); + #endif /* _LINUX_UCS2_STRING_H_ */ diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index 6f500ef2301d..17dd74e21ef9 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c @@ -49,3 +49,65 @@ ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len) } } EXPORT_SYMBOL(ucs2_strncmp); + +unsigned long +ucs2_utf8size(const ucs2_char_t *src) +{ + unsigned long i; + unsigned long j = 0; + + for (i = 0; i < ucs2_strlen(src); i++) { + u16 c = src[i]; + + if (c > 0x800) + j += 3; + else if (c > 0x80) + j += 2; + else + j += 1; + } + + return j; +} +EXPORT_SYMBOL(ucs2_utf8size); + +/* + * copy at most maxlength bytes of whole utf8 characters to dest from the + * ucs2 string src. + * + * The return value is the number of characters copied, not including the + * final NUL character. + */ +unsigned long +ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength) +{ + unsigned int i; + unsigned long j = 0; + unsigned long limit = ucs2_strnlen(src, maxlength); + + for (i = 0; maxlength && i < limit; i++) { + u16 c = src[i]; + + if (c > 0x800) { + if (maxlength < 3) + break; + maxlength -= 3; + dest[j++] = 0xe0 | (c & 0xf000) >> 12; + dest[j++] = 0x80 | (c & 0x0fc0) >> 8; + dest[j++] = 0x80 | (c & 0x003f); + } else if (c > 0x80) { + if (maxlength < 2) + break; + maxlength -= 2; + dest[j++] = 0xc0 | (c & 0xfe0) >> 5; + dest[j++] = 0x80 | (c & 0x01f); + } else { + maxlength -= 1; + dest[j++] = c & 0x7f; + } + } + if (maxlength) + dest[j] = '\0'; + return j; +} +EXPORT_SYMBOL(ucs2_as_utf8); -- cgit v1.2.3 From 8282f5d9c17fe15a9e658c06e3f343efae1a2a2f Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Mon, 8 Feb 2016 14:48:14 -0500 Subject: efi: Make our variable validation list include the guid All the variables in this list so far are defined to be in the global namespace in the UEFI spec, so this just further ensures we're validating the variables we think we are. Including the guid for entries will become more important in future patches when we decide whether or not to allow deletion of variables based on presence in this list. Signed-off-by: Peter Jones Tested-by: Lee, Chun-Yi Acked-by: Matthew Garrett Signed-off-by: Matt Fleming --- drivers/firmware/efi/efivars.c | 5 ++-- drivers/firmware/efi/vars.c | 52 +++++++++++++++++++++++++++--------------- include/linux/efi.h | 3 ++- 3 files changed, 38 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index f4ff8abc5f3e..10e6774ab2a2 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -221,7 +221,7 @@ sanity_check(struct efi_variable *var, efi_char16_t *name, efi_guid_t vendor, } if ((attributes & ~EFI_VARIABLE_MASK) != 0 || - efivar_validate(name, data, size) == false) { + efivar_validate(vendor, name, data, size) == false) { printk(KERN_ERR "efivars: Malformed variable content\n"); return -EINVAL; } @@ -447,7 +447,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, } if ((attributes & ~EFI_VARIABLE_MASK) != 0 || - efivar_validate(name, data, size) == false) { + efivar_validate(new_var->VendorGuid, name, data, + size) == false) { printk(KERN_ERR "efivars: Malformed variable content\n"); return -EINVAL; } diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 5c5fde3e6c37..9a53da21e7b6 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -165,31 +165,42 @@ validate_ascii_string(efi_char16_t *var_name, int match, u8 *buffer, } struct variable_validate { + efi_guid_t vendor; char *name; bool (*validate)(efi_char16_t *var_name, int match, u8 *data, unsigned long len); }; +/* + * This is the list of variables we need to validate. + * + * If it has a validate() method that's not NULL, it'll go into the + * validation routine. If not, it is assumed valid. + * + * Note that it's sorted by {vendor,name}, but globbed names must come after + * any other name with the same prefix. + */ static const struct variable_validate variable_validate[] = { - { "BootNext", validate_uint16 }, - { "BootOrder", validate_boot_order }, - { "DriverOrder", validate_boot_order }, - { "Boot*", validate_load_option }, - { "Driver*", validate_load_option }, - { "ConIn", validate_device_path }, - { "ConInDev", validate_device_path }, - { "ConOut", validate_device_path }, - { "ConOutDev", validate_device_path }, - { "ErrOut", validate_device_path }, - { "ErrOutDev", validate_device_path }, - { "Timeout", validate_uint16 }, - { "Lang", validate_ascii_string }, - { "PlatformLang", validate_ascii_string }, - { "", NULL }, + { EFI_GLOBAL_VARIABLE_GUID, "BootNext", validate_uint16 }, + { EFI_GLOBAL_VARIABLE_GUID, "BootOrder", validate_boot_order }, + { EFI_GLOBAL_VARIABLE_GUID, "Boot*", validate_load_option }, + { EFI_GLOBAL_VARIABLE_GUID, "DriverOrder", validate_boot_order }, + { EFI_GLOBAL_VARIABLE_GUID, "Driver*", validate_load_option }, + { EFI_GLOBAL_VARIABLE_GUID, "ConIn", validate_device_path }, + { EFI_GLOBAL_VARIABLE_GUID, "ConInDev", validate_device_path }, + { EFI_GLOBAL_VARIABLE_GUID, "ConOut", validate_device_path }, + { EFI_GLOBAL_VARIABLE_GUID, "ConOutDev", validate_device_path }, + { EFI_GLOBAL_VARIABLE_GUID, "ErrOut", validate_device_path }, + { EFI_GLOBAL_VARIABLE_GUID, "ErrOutDev", validate_device_path }, + { EFI_GLOBAL_VARIABLE_GUID, "Lang", validate_ascii_string }, + { EFI_GLOBAL_VARIABLE_GUID, "PlatformLang", validate_ascii_string }, + { EFI_GLOBAL_VARIABLE_GUID, "Timeout", validate_uint16 }, + { NULL_GUID, "", NULL }, }; bool -efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long data_size) +efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, + unsigned long data_size) { int i; unsigned long utf8_size; @@ -203,9 +214,12 @@ efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long data_size) ucs2_as_utf8(utf8_name, var_name, utf8_size); utf8_name[utf8_size] = '\0'; - for (i = 0; variable_validate[i].validate != NULL; i++) { + for (i = 0; variable_validate[i].name[0] != '\0'; i++) { const char *name = variable_validate[i].name; - int match; + int match = 0; + + if (efi_guidcmp(vendor, variable_validate[i].vendor)) + continue; for (match = 0; ; match++) { char c = name[match]; @@ -862,7 +876,7 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, *set = false; - if (efivar_validate(name, data, *size) == false) + if (efivar_validate(*vendor, name, data, *size) == false) return -EINVAL; /* diff --git a/include/linux/efi.h b/include/linux/efi.h index 569b5a866bb1..16ca611aabc8 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1199,7 +1199,8 @@ int efivar_entry_iter(int (*func)(struct efivar_entry *, void *), struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid, struct list_head *head, bool remove); -bool efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len); +bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, + unsigned long data_size); extern struct work_struct efivar_work; void efivar_run_worker(void); -- cgit v1.2.3 From ed8b0de5a33d2a2557dce7f9429dca8cb5bc5879 Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Mon, 8 Feb 2016 14:48:15 -0500 Subject: efi: Make efivarfs entries immutable by default "rm -rf" is bricking some peoples' laptops because of variables being used to store non-reinitializable firmware driver data that's required to POST the hardware. These are 100% bugs, and they need to be fixed, but in the mean time it shouldn't be easy to *accidentally* brick machines. We have to have delete working, and picking which variables do and don't work for deletion is quite intractable, so instead make everything immutable by default (except for a whitelist), and make tools that aren't quite so broad-spectrum unset the immutable flag. Signed-off-by: Peter Jones Tested-by: Lee, Chun-Yi Acked-by: Matthew Garrett Signed-off-by: Matt Fleming --- Documentation/filesystems/efivarfs.txt | 7 +++ drivers/firmware/efi/vars.c | 87 +++++++++++++++++++------- fs/efivarfs/file.c | 70 +++++++++++++++++++++ fs/efivarfs/inode.c | 30 +++++---- fs/efivarfs/internal.h | 3 +- fs/efivarfs/super.c | 9 ++- include/linux/efi.h | 2 + tools/testing/selftests/efivarfs/efivarfs.sh | 19 +++++- tools/testing/selftests/efivarfs/open-unlink.c | 72 ++++++++++++++++++++- 9 files changed, 258 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/efivarfs.txt b/Documentation/filesystems/efivarfs.txt index c477af086e65..686a64bba775 100644 --- a/Documentation/filesystems/efivarfs.txt +++ b/Documentation/filesystems/efivarfs.txt @@ -14,3 +14,10 @@ filesystem. efivarfs is typically mounted like this, mount -t efivarfs none /sys/firmware/efi/efivars + +Due to the presence of numerous firmware bugs where removing non-standard +UEFI variables causes the system firmware to fail to POST, efivarfs +files that are not well-known standardized variables are created +as immutable files. This doesn't prevent removal - "chattr -i" will work - +but it does prevent this kind of failure from being accomplished +accidentally. diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 9a53da21e7b6..50f10bad2604 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -172,10 +172,12 @@ struct variable_validate { }; /* - * This is the list of variables we need to validate. + * This is the list of variables we need to validate, as well as the + * whitelist for what we think is safe not to default to immutable. * * If it has a validate() method that's not NULL, it'll go into the - * validation routine. If not, it is assumed valid. + * validation routine. If not, it is assumed valid, but still used for + * whitelisting. * * Note that it's sorted by {vendor,name}, but globbed names must come after * any other name with the same prefix. @@ -193,11 +195,37 @@ static const struct variable_validate variable_validate[] = { { EFI_GLOBAL_VARIABLE_GUID, "ErrOut", validate_device_path }, { EFI_GLOBAL_VARIABLE_GUID, "ErrOutDev", validate_device_path }, { EFI_GLOBAL_VARIABLE_GUID, "Lang", validate_ascii_string }, + { EFI_GLOBAL_VARIABLE_GUID, "OsIndications", NULL }, { EFI_GLOBAL_VARIABLE_GUID, "PlatformLang", validate_ascii_string }, { EFI_GLOBAL_VARIABLE_GUID, "Timeout", validate_uint16 }, { NULL_GUID, "", NULL }, }; +static bool +variable_matches(const char *var_name, size_t len, const char *match_name, + int *match) +{ + for (*match = 0; ; (*match)++) { + char c = match_name[*match]; + char u = var_name[*match]; + + /* Wildcard in the matching name means we've matched */ + if (c == '*') + return true; + + /* Case sensitive match */ + if (!c && *match == len) + return true; + + if (c != u) + return false; + + if (!c) + return true; + } + return true; +} + bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, unsigned long data_size) @@ -221,35 +249,48 @@ efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, if (efi_guidcmp(vendor, variable_validate[i].vendor)) continue; - for (match = 0; ; match++) { - char c = name[match]; - char u = utf8_name[match]; - - /* Wildcard in the matching name means we've matched */ - if (c == '*') { - kfree(utf8_name); - return variable_validate[i].validate(var_name, - match, data, data_size); - } - - /* Case sensitive match */ - if (c != u) + if (variable_matches(utf8_name, utf8_size+1, name, &match)) { + if (variable_validate[i].validate == NULL) break; - - /* Reached the end of the string while matching */ - if (!c) { - kfree(utf8_name); - return variable_validate[i].validate(var_name, - match, data, data_size); - } + kfree(utf8_name); + return variable_validate[i].validate(var_name, match, + data, data_size); } } - kfree(utf8_name); return true; } EXPORT_SYMBOL_GPL(efivar_validate); +bool +efivar_variable_is_removable(efi_guid_t vendor, const char *var_name, + size_t len) +{ + int i; + bool found = false; + int match = 0; + + /* + * Check if our variable is in the validated variables list + */ + for (i = 0; variable_validate[i].name[0] != '\0'; i++) { + if (efi_guidcmp(variable_validate[i].vendor, vendor)) + continue; + + if (variable_matches(var_name, len, + variable_validate[i].name, &match)) { + found = true; + break; + } + } + + /* + * If it's in our list, it is removable. + */ + return found; +} +EXPORT_SYMBOL_GPL(efivar_variable_is_removable); + static efi_status_t check_var_size(u32 attributes, unsigned long size) { diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index c424e4813ec8..d48e0d261d78 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "internal.h" @@ -103,9 +104,78 @@ out_free: return size; } +static int +efivarfs_ioc_getxflags(struct file *file, void __user *arg) +{ + struct inode *inode = file->f_mapping->host; + unsigned int i_flags; + unsigned int flags = 0; + + i_flags = inode->i_flags; + if (i_flags & S_IMMUTABLE) + flags |= FS_IMMUTABLE_FL; + + if (copy_to_user(arg, &flags, sizeof(flags))) + return -EFAULT; + return 0; +} + +static int +efivarfs_ioc_setxflags(struct file *file, void __user *arg) +{ + struct inode *inode = file->f_mapping->host; + unsigned int flags; + unsigned int i_flags = 0; + int error; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (copy_from_user(&flags, arg, sizeof(flags))) + return -EFAULT; + + if (flags & ~FS_IMMUTABLE_FL) + return -EOPNOTSUPP; + + if (!capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + if (flags & FS_IMMUTABLE_FL) + i_flags |= S_IMMUTABLE; + + + error = mnt_want_write_file(file); + if (error) + return error; + + inode_lock(inode); + inode_set_flags(inode, i_flags, S_IMMUTABLE); + inode_unlock(inode); + + mnt_drop_write_file(file); + + return 0; +} + +long +efivarfs_file_ioctl(struct file *file, unsigned int cmd, unsigned long p) +{ + void __user *arg = (void __user *)p; + + switch (cmd) { + case FS_IOC_GETFLAGS: + return efivarfs_ioc_getxflags(file, arg); + case FS_IOC_SETFLAGS: + return efivarfs_ioc_setxflags(file, arg); + } + + return -ENOTTY; +} + const struct file_operations efivarfs_file_operations = { .open = simple_open, .read = efivarfs_file_read, .write = efivarfs_file_write, .llseek = no_llseek, + .unlocked_ioctl = efivarfs_file_ioctl, }; diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 3381b9da9ee6..e2ab6d0497f2 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -15,7 +15,8 @@ #include "internal.h" struct inode *efivarfs_get_inode(struct super_block *sb, - const struct inode *dir, int mode, dev_t dev) + const struct inode *dir, int mode, + dev_t dev, bool is_removable) { struct inode *inode = new_inode(sb); @@ -23,6 +24,7 @@ struct inode *efivarfs_get_inode(struct super_block *sb, inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_flags = is_removable ? 0 : S_IMMUTABLE; switch (mode & S_IFMT) { case S_IFREG: inode->i_fop = &efivarfs_file_operations; @@ -102,22 +104,17 @@ static void efivarfs_hex_to_guid(const char *str, efi_guid_t *guid) static int efivarfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - struct inode *inode; + struct inode *inode = NULL; struct efivar_entry *var; int namelen, i = 0, err = 0; + bool is_removable = false; if (!efivarfs_valid_name(dentry->d_name.name, dentry->d_name.len)) return -EINVAL; - inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0); - if (!inode) - return -ENOMEM; - var = kzalloc(sizeof(struct efivar_entry), GFP_KERNEL); - if (!var) { - err = -ENOMEM; - goto out; - } + if (!var) + return -ENOMEM; /* length of the variable name itself: remove GUID and separator */ namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1; @@ -125,6 +122,16 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, efivarfs_hex_to_guid(dentry->d_name.name + namelen + 1, &var->var.VendorGuid); + if (efivar_variable_is_removable(var->var.VendorGuid, + dentry->d_name.name, namelen)) + is_removable = true; + + inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0, is_removable); + if (!inode) { + err = -ENOMEM; + goto out; + } + for (i = 0; i < namelen; i++) var->var.VariableName[i] = dentry->d_name.name[i]; @@ -138,7 +145,8 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, out: if (err) { kfree(var); - iput(inode); + if (inode) + iput(inode); } return err; } diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h index b5ff16addb7c..b4505188e799 100644 --- a/fs/efivarfs/internal.h +++ b/fs/efivarfs/internal.h @@ -15,7 +15,8 @@ extern const struct file_operations efivarfs_file_operations; extern const struct inode_operations efivarfs_dir_inode_operations; extern bool efivarfs_valid_name(const char *str, int len); extern struct inode *efivarfs_get_inode(struct super_block *sb, - const struct inode *dir, int mode, dev_t dev); + const struct inode *dir, int mode, dev_t dev, + bool is_removable); extern struct list_head efivarfs_list; diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 8651ac28ec0d..dd029d13ea61 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -120,6 +120,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, char *name; int len; int err = -ENOMEM; + bool is_removable = false; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) @@ -137,13 +138,17 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, ucs2_as_utf8(name, entry->var.VariableName, len); + if (efivar_variable_is_removable(entry->var.VendorGuid, name, len)) + is_removable = true; + name[len] = '-'; efi_guid_to_str(&entry->var.VendorGuid, name + len + 1); name[len + EFI_VARIABLE_GUID_LEN+1] = '\0'; - inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0); + inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0, + is_removable); if (!inode) goto fail_name; @@ -199,7 +204,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_d_op = &efivarfs_d_ops; sb->s_time_gran = 1; - inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0); + inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0, true); if (!inode) return -ENOMEM; inode->i_op = &efivarfs_dir_inode_operations; diff --git a/include/linux/efi.h b/include/linux/efi.h index 16ca611aabc8..47be3ad7d3e5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1201,6 +1201,8 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid, bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, unsigned long data_size); +bool efivar_variable_is_removable(efi_guid_t vendor, const char *name, + size_t len); extern struct work_struct efivar_work; void efivar_run_worker(void); diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh index 77edcdcc016b..057278448515 100755 --- a/tools/testing/selftests/efivarfs/efivarfs.sh +++ b/tools/testing/selftests/efivarfs/efivarfs.sh @@ -88,7 +88,11 @@ test_delete() exit 1 fi - rm $file + rm $file 2>/dev/null + if [ $? -ne 0 ]; then + chattr -i $file + rm $file + fi if [ -e $file ]; then echo "$file couldn't be deleted" >&2 @@ -111,6 +115,7 @@ test_zero_size_delete() exit 1 fi + chattr -i $file printf "$attrs" > $file if [ -e $file ]; then @@ -141,7 +146,11 @@ test_valid_filenames() echo "$file could not be created" >&2 ret=1 else - rm $file + rm $file 2>/dev/null + if [ $? -ne 0 ]; then + chattr -i $file + rm $file + fi fi done @@ -174,7 +183,11 @@ test_invalid_filenames() if [ -e $file ]; then echo "Creating $file should have failed" >&2 - rm $file + rm $file 2>/dev/null + if [ $? -ne 0 ]; then + chattr -i $file + rm $file + fi ret=1 fi done diff --git a/tools/testing/selftests/efivarfs/open-unlink.c b/tools/testing/selftests/efivarfs/open-unlink.c index 8c0764407b3c..4af74f733036 100644 --- a/tools/testing/selftests/efivarfs/open-unlink.c +++ b/tools/testing/selftests/efivarfs/open-unlink.c @@ -1,10 +1,68 @@ +#include #include #include #include #include +#include #include #include #include +#include + +static int set_immutable(const char *path, int immutable) +{ + unsigned int flags; + int fd; + int rc; + int error; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + rc = ioctl(fd, FS_IOC_GETFLAGS, &flags); + if (rc < 0) { + error = errno; + close(fd); + errno = error; + return rc; + } + + if (immutable) + flags |= FS_IMMUTABLE_FL; + else + flags &= ~FS_IMMUTABLE_FL; + + rc = ioctl(fd, FS_IOC_SETFLAGS, &flags); + error = errno; + close(fd); + errno = error; + return rc; +} + +static int get_immutable(const char *path) +{ + unsigned int flags; + int fd; + int rc; + int error; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + rc = ioctl(fd, FS_IOC_GETFLAGS, &flags); + if (rc < 0) { + error = errno; + close(fd); + errno = error; + return rc; + } + close(fd); + if (flags & FS_IMMUTABLE_FL) + return 1; + return 0; +} int main(int argc, char **argv) { @@ -27,7 +85,7 @@ int main(int argc, char **argv) buf[4] = 0; /* create a test variable */ - fd = open(path, O_WRONLY | O_CREAT); + fd = open(path, O_WRONLY | O_CREAT, 0600); if (fd < 0) { perror("open(O_WRONLY)"); return EXIT_FAILURE; @@ -41,6 +99,18 @@ int main(int argc, char **argv) close(fd); + rc = get_immutable(path); + if (rc < 0) { + perror("ioctl(FS_IOC_GETFLAGS)"); + return EXIT_FAILURE; + } else if (rc) { + rc = set_immutable(path, 0); + if (rc < 0) { + perror("ioctl(FS_IOC_SETFLAGS)"); + return EXIT_FAILURE; + } + } + fd = open(path, O_RDONLY); if (fd < 0) { perror("open"); -- cgit v1.2.3 From 9095adaab8c1d82707e4e9961b6ad79b62f3361b Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 10 Feb 2016 18:59:13 -0500 Subject: target/transport: add flag to indicate CPU Affinity is observed Signed-off-by: Quinn Tran Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Fixes: fb3269b ("qla2xxx: Add selective command queuing") Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 12 +++++++++--- include/target/target_core_base.h | 2 ++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index d92cb644d8f9..867bc6d0a68a 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -711,10 +711,10 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) cmd->transport_state |= (CMD_T_COMPLETE | CMD_T_ACTIVE); spin_unlock_irqrestore(&cmd->t_state_lock, flags); - if (cmd->cpuid == -1) - queue_work(target_completion_wq, &cmd->work); - else + if (cmd->se_cmd_flags & SCF_USE_CPUID) queue_work_on(cmd->cpuid, target_completion_wq, &cmd->work); + else + queue_work(target_completion_wq, &cmd->work); } EXPORT_SYMBOL(target_complete_cmd); @@ -1426,6 +1426,12 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess */ transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess, data_length, data_dir, task_attr, sense); + + if (flags & TARGET_SCF_USE_CPUID) + se_cmd->se_cmd_flags |= SCF_USE_CPUID; + else + se_cmd->cpuid = WORK_CPU_UNBOUND; + if (flags & TARGET_SCF_UNKNOWN_SIZE) se_cmd->unknown_data_length = 1; /* diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index d71a3ead9e63..e8c8c08bf575 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -141,6 +141,7 @@ enum se_cmd_flags_table { SCF_COMPARE_AND_WRITE_POST = 0x00100000, SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC = 0x00200000, SCF_ACK_KREF = 0x00400000, + SCF_USE_CPUID = 0x00800000, }; /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */ @@ -188,6 +189,7 @@ enum target_sc_flags_table { TARGET_SCF_BIDI_OP = 0x01, TARGET_SCF_ACK_KREF = 0x02, TARGET_SCF_UNKNOWN_SIZE = 0x04, + TARGET_SCF_USE_CPUID = 0x08, }; /* fabric independent task management function values */ -- cgit v1.2.3 From 4a389810bc3cb0e73443104f0827e81e23cb1e12 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 11 Feb 2016 16:13:14 -0800 Subject: kernel/locking/lockdep.c: convert hash tables to hlists Mike said: : CONFIG_UBSAN_ALIGNMENT breaks x86-64 kernel with lockdep enabled, i. e : kernel with CONFIG_UBSAN_ALIGNMENT fails to load without even any error : message. : : The problem is that ubsan callbacks use spinlocks and might be called : before lockdep is initialized. Particularly this line in the : reserve_ebda_region function causes problem: : : lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); : : If i put lockdep_init() before reserve_ebda_region call in : x86_64_start_reservations kernel loads well. Fix this ordering issue permanently: change lockdep so that it uses hlists for the hash tables. Unlike a list_head, an hlist_head is in its initialized state when it is all-zeroes, so lockdep is ready for operation immediately upon boot - lockdep_init() need not have run. The patch will also save some memory. lockdep_init() and lockdep_initialized can be done away with now - a 4.6 patch has been prepared to do this. Reported-by: Mike Krinkin Suggested-by: Mike Krinkin Cc: Andrey Ryabinin Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockdep.h | 4 ++-- kernel/locking/lockdep.c | 42 +++++++++++++++++++----------------------- 2 files changed, 21 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index c57e424d914b..4dca42fd32f5 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -66,7 +66,7 @@ struct lock_class { /* * class-hash: */ - struct list_head hash_entry; + struct hlist_node hash_entry; /* * global list of all lock-classes: @@ -199,7 +199,7 @@ struct lock_chain { u8 irq_context; u8 depth; u16 base; - struct list_head entry; + struct hlist_node entry; u64 chain_key; }; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 60ace56618f6..7537e568dabe 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -292,7 +292,7 @@ LIST_HEAD(all_lock_classes); #define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS) #define classhashentry(key) (classhash_table + __classhashfn((key))) -static struct list_head classhash_table[CLASSHASH_SIZE]; +static struct hlist_head classhash_table[CLASSHASH_SIZE]; /* * We put the lock dependency chains into a hash-table as well, to cache @@ -303,7 +303,7 @@ static struct list_head classhash_table[CLASSHASH_SIZE]; #define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS) #define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) -static struct list_head chainhash_table[CHAINHASH_SIZE]; +static struct hlist_head chainhash_table[CHAINHASH_SIZE]; /* * The hash key of the lock dependency chains is a hash itself too: @@ -666,7 +666,7 @@ static inline struct lock_class * look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) { struct lockdep_subclass_key *key; - struct list_head *hash_head; + struct hlist_head *hash_head; struct lock_class *class; #ifdef CONFIG_DEBUG_LOCKDEP @@ -719,7 +719,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return NULL; - list_for_each_entry_rcu(class, hash_head, hash_entry) { + hlist_for_each_entry_rcu(class, hash_head, hash_entry) { if (class->key == key) { /* * Huh! same key, different name? Did someone trample @@ -742,7 +742,7 @@ static inline struct lock_class * register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) { struct lockdep_subclass_key *key; - struct list_head *hash_head; + struct hlist_head *hash_head; struct lock_class *class; DEBUG_LOCKS_WARN_ON(!irqs_disabled()); @@ -774,7 +774,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) * We have to do the hash-walk again, to avoid races * with another CPU: */ - list_for_each_entry_rcu(class, hash_head, hash_entry) { + hlist_for_each_entry_rcu(class, hash_head, hash_entry) { if (class->key == key) goto out_unlock_set; } @@ -805,7 +805,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) * We use RCU's safe list-add method to make * parallel walking of the hash-list safe: */ - list_add_tail_rcu(&class->hash_entry, hash_head); + hlist_add_head_rcu(&class->hash_entry, hash_head); /* * Add it to the global list of classes: */ @@ -2017,7 +2017,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, u64 chain_key) { struct lock_class *class = hlock_class(hlock); - struct list_head *hash_head = chainhashentry(chain_key); + struct hlist_head *hash_head = chainhashentry(chain_key); struct lock_chain *chain; struct held_lock *hlock_curr; int i, j; @@ -2033,7 +2033,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, * We can walk it lock-free, because entries only get added * to the hash: */ - list_for_each_entry_rcu(chain, hash_head, entry) { + hlist_for_each_entry_rcu(chain, hash_head, entry) { if (chain->chain_key == chain_key) { cache_hit: debug_atomic_inc(chain_lookup_hits); @@ -2057,7 +2057,7 @@ cache_hit: /* * We have to walk the chain again locked - to avoid duplicates: */ - list_for_each_entry(chain, hash_head, entry) { + hlist_for_each_entry(chain, hash_head, entry) { if (chain->chain_key == chain_key) { graph_unlock(); goto cache_hit; @@ -2091,7 +2091,7 @@ cache_hit: } chain_hlocks[chain->base + j] = class - lock_classes; } - list_add_tail_rcu(&chain->entry, hash_head); + hlist_add_head_rcu(&chain->entry, hash_head); debug_atomic_inc(chain_lookup_misses); inc_chains(); @@ -3875,7 +3875,7 @@ void lockdep_reset(void) nr_process_chains = 0; debug_locks = 1; for (i = 0; i < CHAINHASH_SIZE; i++) - INIT_LIST_HEAD(chainhash_table + i); + INIT_HLIST_HEAD(chainhash_table + i); raw_local_irq_restore(flags); } @@ -3894,7 +3894,7 @@ static void zap_class(struct lock_class *class) /* * Unhash the class and remove it from the all_lock_classes list: */ - list_del_rcu(&class->hash_entry); + hlist_del_rcu(&class->hash_entry); list_del_rcu(&class->lock_entry); RCU_INIT_POINTER(class->key, NULL); @@ -3917,7 +3917,7 @@ static inline int within(const void *addr, void *start, unsigned long size) void lockdep_free_key_range(void *start, unsigned long size) { struct lock_class *class; - struct list_head *head; + struct hlist_head *head; unsigned long flags; int i; int locked; @@ -3930,9 +3930,7 @@ void lockdep_free_key_range(void *start, unsigned long size) */ for (i = 0; i < CLASSHASH_SIZE; i++) { head = classhash_table + i; - if (list_empty(head)) - continue; - list_for_each_entry_rcu(class, head, hash_entry) { + hlist_for_each_entry_rcu(class, head, hash_entry) { if (within(class->key, start, size)) zap_class(class); else if (within(class->name, start, size)) @@ -3962,7 +3960,7 @@ void lockdep_free_key_range(void *start, unsigned long size) void lockdep_reset_lock(struct lockdep_map *lock) { struct lock_class *class; - struct list_head *head; + struct hlist_head *head; unsigned long flags; int i, j; int locked; @@ -3987,9 +3985,7 @@ void lockdep_reset_lock(struct lockdep_map *lock) locked = graph_lock(); for (i = 0; i < CLASSHASH_SIZE; i++) { head = classhash_table + i; - if (list_empty(head)) - continue; - list_for_each_entry_rcu(class, head, hash_entry) { + hlist_for_each_entry_rcu(class, head, hash_entry) { int match = 0; for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) @@ -4027,10 +4023,10 @@ void lockdep_init(void) return; for (i = 0; i < CLASSHASH_SIZE; i++) - INIT_LIST_HEAD(classhash_table + i); + INIT_HLIST_HEAD(classhash_table + i); for (i = 0; i < CHAINHASH_SIZE; i++) - INIT_LIST_HEAD(chainhash_table + i); + INIT_HLIST_HEAD(chainhash_table + i); lockdep_initialized = 1; } -- cgit v1.2.3 From db78c22230d0bcc8b27b81f05b39f104f08232c5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 11 Feb 2016 16:13:17 -0800 Subject: mm: fix pfn_t vs highmem The pfn_t type uses an unsigned long to store a pfn + flags value. On a 64-bit platform the upper 12 bits of an unsigned long are never used for storing the value of a pfn. However, this is not true on highmem platforms, all 32-bits of a pfn value are used to address a 44-bit physical address space. A pfn_t needs to store a 64-bit value. Link: https://bugzilla.kernel.org/show_bug.cgi?id=112211 Fixes: 01c8f1c44b83 ("mm, dax, gpu: convert vm_insert_mixed to pfn_t") Signed-off-by: Dan Williams Reported-by: Stuart Foster Reported-by: Julian Margetson Tested-by: Julian Margetson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pfn.h | 2 +- include/linux/pfn_t.h | 19 +++++++++---------- kernel/memremap.c | 2 +- 3 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/pfn.h b/include/linux/pfn.h index 2d8e49711b63..1132953235c0 100644 --- a/include/linux/pfn.h +++ b/include/linux/pfn.h @@ -10,7 +10,7 @@ * backing is indicated by flags in the high bits of the value. */ typedef struct { - unsigned long val; + u64 val; } pfn_t; #endif diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 37448ab5fb5c..94994810c7c0 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -9,14 +9,13 @@ * PFN_DEV - pfn is not covered by system memmap by default * PFN_MAP - pfn has a dynamic page mapping established by a device driver */ -#define PFN_FLAGS_MASK (((unsigned long) ~PAGE_MASK) \ - << (BITS_PER_LONG - PAGE_SHIFT)) -#define PFN_SG_CHAIN (1UL << (BITS_PER_LONG - 1)) -#define PFN_SG_LAST (1UL << (BITS_PER_LONG - 2)) -#define PFN_DEV (1UL << (BITS_PER_LONG - 3)) -#define PFN_MAP (1UL << (BITS_PER_LONG - 4)) - -static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, unsigned long flags) +#define PFN_FLAGS_MASK (((u64) ~PAGE_MASK) << (BITS_PER_LONG_LONG - PAGE_SHIFT)) +#define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1)) +#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) +#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) +#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) + +static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags) { pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), }; @@ -29,7 +28,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn) return __pfn_to_pfn_t(pfn, 0); } -extern pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags); +extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags); static inline bool pfn_t_has_page(pfn_t pfn) { @@ -87,7 +86,7 @@ static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot) #ifdef __HAVE_ARCH_PTE_DEVMAP static inline bool pfn_t_devmap(pfn_t pfn) { - const unsigned long flags = PFN_DEV|PFN_MAP; + const u64 flags = PFN_DEV|PFN_MAP; return (pfn.val & flags) == flags; } diff --git a/kernel/memremap.c b/kernel/memremap.c index 70ee3775de24..2c468dea60bc 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -150,7 +150,7 @@ void devm_memunmap(struct device *dev, void *addr) } EXPORT_SYMBOL(devm_memunmap); -pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags) +pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags) { return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags); } -- cgit v1.2.3 From c777e2a8b65420b31dac28a453e35be984f5808b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 9 Feb 2016 06:50:31 +0530 Subject: powerpc/mm: Fix Multi hit ERAT cause by recent THP update With ppc64 we use the deposited pgtable_t to store the hash pte slot information. We should not withdraw the deposited pgtable_t without marking the pmd none. This ensure that low level hash fault handling will skip this huge pte and we will handle them at upper levels. Recent change to pmd splitting changed the above in order to handle the race between pmd split and exit_mmap. The race is explained below. Consider following race: CPU0 CPU1 shrink_page_list() add_to_swap() split_huge_page_to_list() __split_huge_pmd_locked() pmdp_huge_clear_flush_notify() // pmd_none() == true exit_mmap() unmap_vmas() zap_pmd_range() // no action on pmd since pmd_none() == true pmd_populate() As result the THP will not be freed. The leak is detected by check_mm(): BUG: Bad rss-counter state mm:ffff880058d2e580 idx:1 val:512 The above required us to not mark pmd none during a pmd split. The fix for ppc is to clear the huge pte of _PAGE_USER, so that low level fault handling code skip this pte. At higher level we do take ptl lock. That should serialze us against the pmd split. Once the lock is acquired we do check the pmd again using pmd_same. That should always return false for us and hence we should retry the access. We do the pmd_same check in all case after taking plt with THP (do_huge_pmd_wp_page, do_huge_pmd_numa_page and huge_pmd_set_accessed) Also make sure we wait for irq disable section in other cpus to finish before flipping a huge pte entry with a regular pmd entry. Code paths like find_linux_pte_or_hugepte depend on irq disable to get a stable pte_t pointer. A parallel thp split need to make sure we don't convert a pmd pte to a regular pmd entry without waiting for the irq disable section to finish. Fixes: eef1b3ba053a ("thp: implement split_huge_pmd()") Acked-by: Kirill A. Shutemov Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 4 ++++ arch/powerpc/mm/pgtable_64.c | 32 ++++++++++++++++++++++++++++ include/asm-generic/pgtable.h | 8 +++++++ mm/huge_memory.c | 1 + 4 files changed, 45 insertions(+) (limited to 'include') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 8d1c41d28318..ac07a30a7934 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -281,6 +281,10 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); +#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE +extern void pmdp_huge_split_prepare(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + #define pmd_move_must_withdraw pmd_move_must_withdraw struct spinlock; static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 3124a20d0fab..cdf2123d46db 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -646,6 +646,28 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) return pgtable; } +void pmdp_huge_split_prepare(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + VM_BUG_ON(REGION_ID(address) != USER_REGION_ID); + + /* + * We can't mark the pmd none here, because that will cause a race + * against exit_mmap. We need to continue mark pmd TRANS HUGE, while + * we spilt, but at the same time we wan't rest of the ppc64 code + * not to insert hash pte on this, because we will be modifying + * the deposited pgtable in the caller of this function. Hence + * clear the _PAGE_USER so that we move the fault handling to + * higher level function and that will serialize against ptl. + * We need to flush existing hash pte entries here even though, + * the translation is still valid, because we will withdraw + * pgtable_t after this. + */ + pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0); +} + + /* * set a new huge pmd. We should not be called for updating * an existing pmd entry. That should go via pmd_hugepage_update. @@ -663,10 +685,20 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); } +/* + * We use this to invalidate a pmdp entry before switching from a + * hugepte to regular pmd entry. + */ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); + + /* + * This ensures that generic code that rely on IRQ disabling + * to prevent a parallel THP split work as expected. + */ + kick_all_cpus_sync(); } /* diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 0b3c0d39ef75..c370b261c720 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -239,6 +239,14 @@ extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #endif +#ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE +static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + +} +#endif + #ifndef __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b1cf73bc3b12..de3f43cde129 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2856,6 +2856,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, young = pmd_young(*pmd); dirty = pmd_dirty(*pmd); + pmdp_huge_split_prepare(vma, haddr, pmd); pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); -- cgit v1.2.3 From 46924008273ed03bd11dbb32136e3da4cfe056e1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 15 Feb 2016 12:42:38 +0000 Subject: iommu/vt-d: Clear PPR bit to ensure we get more page request interrupts According to the VT-d specification we need to clear the PPR bit in the Page Request Status register when handling page requests, or the hardware won't generate any more interrupts. This wasn't actually necessary on SKL/KBL (which may well be the subject of a hardware erratum, although it's harmless enough). But other implementations do appear to get it right, and we only ever get one interrupt unless we clear the PPR bit. Reported-by: CQ Tang Signed-off-by: David Woodhouse Cc: stable@vger.kernel.org --- drivers/iommu/intel-svm.c | 4 ++++ include/linux/intel-iommu.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 97a818992d6d..d9939fa9b588 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -524,6 +524,10 @@ static irqreturn_t prq_event_thread(int irq, void *d) struct intel_svm *svm = NULL; int head, tail, handled = 0; + /* Clear PPR bit before reading head/tail registers, to + * ensure that we get a new interrupt if needed. */ + writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); + tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; while (head != tail) { diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 821273ca4873..2d9b650047a5 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -235,6 +235,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) /* low 64 bit */ #define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT)) +/* PRS_REG */ +#define DMA_PRS_PPR ((u32)1) + #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ do { \ cycles_t start_time = get_cycles(); \ -- cgit v1.2.3 From f37755490fe9bf76f6ba1d8c6591745d3574a6a6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 15 Feb 2016 12:36:14 -0500 Subject: tracepoints: Do not trace when cpu is offline The tracepoint infrastructure uses RCU sched protection to enable and disable tracepoints safely. There are some instances where tracepoints are used in infrastructure code (like kfree()) that get called after a CPU is going offline, and perhaps when it is coming back online but hasn't been registered yet. This can probuce the following warning: [ INFO: suspicious RCU usage. ] 4.4.0-00006-g0fe53e8-dirty #34 Tainted: G S ------------------------------- include/trace/events/kmem.h:141 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/8/0. stack backtrace: CPU: 8 PID: 0 Comm: swapper/8 Tainted: G S 4.4.0-00006-g0fe53e8-dirty #34 Call Trace: [c0000005b76c78d0] [c0000000008b9540] .dump_stack+0x98/0xd4 (unreliable) [c0000005b76c7950] [c00000000010c898] .lockdep_rcu_suspicious+0x108/0x170 [c0000005b76c79e0] [c00000000029adc0] .kfree+0x390/0x440 [c0000005b76c7a80] [c000000000055f74] .destroy_context+0x44/0x100 [c0000005b76c7b00] [c0000000000934a0] .__mmdrop+0x60/0x150 [c0000005b76c7b90] [c0000000000e3ff0] .idle_task_exit+0x130/0x140 [c0000005b76c7c20] [c000000000075804] .pseries_mach_cpu_die+0x64/0x310 [c0000005b76c7cd0] [c000000000043e7c] .cpu_die+0x3c/0x60 [c0000005b76c7d40] [c0000000000188d8] .arch_cpu_idle_dead+0x28/0x40 [c0000005b76c7db0] [c000000000101e6c] .cpu_startup_entry+0x50c/0x560 [c0000005b76c7ed0] [c000000000043bd8] .start_secondary+0x328/0x360 [c0000005b76c7f90] [c000000000008a6c] start_secondary_prolog+0x10/0x14 This warning is not a false positive either. RCU is not protecting code that is being executed while the CPU is offline. Instead of playing "whack-a-mole(TM)" and adding conditional statements to the tracepoints we find that are used in this instance, simply add a cpu_online() test to the tracepoint code where the tracepoint will be ignored if the CPU is offline. Use of raw_smp_processor_id() is fine, as there should never be a case where the tracepoint code goes from running on a CPU that is online and suddenly gets migrated to a CPU that is offline. Link: http://lkml.kernel.org/r/1455387773-4245-1-git-send-email-kda@linux-powerpc.org Reported-by: Denis Kirjanov Fixes: 97e1c18e8d17b ("tracing: Kernel Tracepoints") Cc: stable@vger.kernel.org # v2.6.28+ Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index acd522a91539..acfdbf353a0b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -14,8 +14,10 @@ * See the file COPYING for more details. */ +#include #include #include +#include #include #include @@ -132,6 +134,9 @@ extern void syscall_unregfunc(void); void *it_func; \ void *__data; \ \ + if (!cpu_online(raw_smp_processor_id())) \ + return; \ + \ if (!(cond)) \ return; \ prercu; \ -- cgit v1.2.3 From b33c8ff4431a343561e2319f17c14286f2aa52e2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Feb 2016 22:26:42 +0100 Subject: tracing: Fix freak link error caused by branch tracer In my randconfig tests, I came across a bug that involves several components: * gcc-4.9 through at least 5.3 * CONFIG_GCOV_PROFILE_ALL enabling -fprofile-arcs for all files * CONFIG_PROFILE_ALL_BRANCHES overriding every if() * The optimized implementation of do_div() that tries to replace a library call with an division by multiplication * code in drivers/media/dvb-frontends/zl10353.c doing u32 adc_clock = 450560; /* 45.056 MHz */ if (state->config.adc_clock) adc_clock = state->config.adc_clock; do_div(value, adc_clock); In this case, gcc fails to determine whether the divisor in do_div() is __builtin_constant_p(). In particular, it concludes that __builtin_constant_p(adc_clock) is false, while __builtin_constant_p(!!adc_clock) is true. That in turn throws off the logic in do_div() that also uses __builtin_constant_p(), and instead of picking either the constant- optimized division, and the code in ilog2() that uses __builtin_constant_p() to figure out whether it knows the answer at compile time. The result is a link error from failing to find multiple symbols that should never have been called based on the __builtin_constant_p(): dvb-frontends/zl10353.c:138: undefined reference to `____ilog2_NaN' dvb-frontends/zl10353.c:138: undefined reference to `__aeabi_uldivmod' ERROR: "____ilog2_NaN" [drivers/media/dvb-frontends/zl10353.ko] undefined! ERROR: "__aeabi_uldivmod" [drivers/media/dvb-frontends/zl10353.ko] undefined! This patch avoids the problem by changing __trace_if() to check whether the condition is known at compile-time to be nonzero, rather than checking whether it is actually a constant. I see this one link error in roughly one out of 1600 randconfig builds on ARM, and the patch fixes all known instances. Link: http://lkml.kernel.org/r/1455312410-1058841-1-git-send-email-arnd@arndb.de Acked-by: Nicolas Pitre Fixes: ab3c9c686e22 ("branch tracer, intel-iommu: fix build with CONFIG_BRANCH_TRACER=y") Cc: stable@vger.kernel.org # v2.6.30+ Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 00b042c49ccd..48f5aab117ae 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -144,7 +144,7 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); */ #define if(cond, ...) __trace_if( (cond , ## __VA_ARGS__) ) #define __trace_if(cond) \ - if (__builtin_constant_p((cond)) ? !!(cond) : \ + if (__builtin_constant_p(!!(cond)) ? !!(cond) : \ ({ \ int ______r; \ static struct ftrace_branch_data \ -- cgit v1.2.3 From b4ff3a36d3e409d365a09b6b783ff895063ff4ef Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 9 Feb 2016 14:57:42 +0200 Subject: net/mlx5: Use offset based reserved field names in the IFC header file mlx5_ifc.h is a header file representing the API and ABI between the driver to the firmware and hardware. This file is used from both the mlx5_ib and mlx5_core drivers. Previously, this file used incrementing counter to indicate reserved fields, for example: struct mlx5_ifc_odp_per_transport_service_cap_bits { u8 send[0x1]; u8 receive[0x1]; u8 write[0x1]; u8 read[0x1]; u8 reserved_0[0x1]; u8 srq_receive[0x1]; u8 reserved_1[0x1a]; }; If one developer implements through net-next feature A that uses reserved_0, they replace it with featureA and renames reserved_1 to reserved_0. In the same kernel cycle, a 2nd developer could implement feature B through the rdma tree, that uses reserved_1 and split it to featureB and a smaller reserved_1 field. This will cause a conflict when the two trees are merged. The source of this conflict is that the 1st developer changed *all* reserved fields. As Linus suggested, we change the layout of structs to: struct mlx5_ifc_odp_per_transport_service_cap_bits { u8 send[0x1]; u8 receive[0x1]; u8 write[0x1]; u8 read[0x1]; u8 reserved_at_4[0x1]; u8 srq_receive[0x1]; u8 reserved_at_6[0x1a]; }; This makes the conflicts much more rare and preserves the locality of changes. Signed-off-by: Matan Barak Signed-off-by: Alaa Hleihel Reported-by: Linus Torvalds Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 2968 ++++++++++++++++++++--------------------- 1 file changed, 1484 insertions(+), 1484 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 231ab6bcea76..51f1e540fc2b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -207,15 +207,15 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_dmac[0x1]; u8 outer_smac[0x1]; u8 outer_ether_type[0x1]; - u8 reserved_0[0x1]; + u8 reserved_at_3[0x1]; u8 outer_first_prio[0x1]; u8 outer_first_cfi[0x1]; u8 outer_first_vid[0x1]; - u8 reserved_1[0x1]; + u8 reserved_at_7[0x1]; u8 outer_second_prio[0x1]; u8 outer_second_cfi[0x1]; u8 outer_second_vid[0x1]; - u8 reserved_2[0x1]; + u8 reserved_at_b[0x1]; u8 outer_sip[0x1]; u8 outer_dip[0x1]; u8 outer_frag[0x1]; @@ -230,21 +230,21 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_gre_protocol[0x1]; u8 outer_gre_key[0x1]; u8 outer_vxlan_vni[0x1]; - u8 reserved_3[0x5]; + u8 reserved_at_1a[0x5]; u8 source_eswitch_port[0x1]; u8 inner_dmac[0x1]; u8 inner_smac[0x1]; u8 inner_ether_type[0x1]; - u8 reserved_4[0x1]; + u8 reserved_at_23[0x1]; u8 inner_first_prio[0x1]; u8 inner_first_cfi[0x1]; u8 inner_first_vid[0x1]; - u8 reserved_5[0x1]; + u8 reserved_at_27[0x1]; u8 inner_second_prio[0x1]; u8 inner_second_cfi[0x1]; u8 inner_second_vid[0x1]; - u8 reserved_6[0x1]; + u8 reserved_at_2b[0x1]; u8 inner_sip[0x1]; u8 inner_dip[0x1]; u8 inner_frag[0x1]; @@ -256,37 +256,37 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 inner_tcp_sport[0x1]; u8 inner_tcp_dport[0x1]; u8 inner_tcp_flags[0x1]; - u8 reserved_7[0x9]; + u8 reserved_at_37[0x9]; - u8 reserved_8[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_flow_table_prop_layout_bits { u8 ft_support[0x1]; - u8 reserved_0[0x2]; + u8 reserved_at_1[0x2]; u8 flow_modify_en[0x1]; u8 modify_root[0x1]; u8 identified_miss_table_mode[0x1]; u8 flow_table_modify[0x1]; - u8 reserved_1[0x19]; + u8 reserved_at_7[0x19]; - u8 reserved_2[0x2]; + u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; - u8 reserved_3[0x10]; + u8 reserved_at_28[0x10]; u8 max_ft_level[0x8]; - u8 reserved_4[0x20]; + u8 reserved_at_40[0x20]; - u8 reserved_5[0x18]; + u8 reserved_at_60[0x18]; u8 log_max_ft_num[0x8]; - u8 reserved_6[0x18]; + u8 reserved_at_80[0x18]; u8 log_max_destination[0x8]; - u8 reserved_7[0x18]; + u8 reserved_at_a0[0x18]; u8 log_max_flow[0x8]; - u8 reserved_8[0x40]; + u8 reserved_at_c0[0x40]; struct mlx5_ifc_flow_table_fields_supported_bits ft_field_support; @@ -298,13 +298,13 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits { u8 receive[0x1]; u8 write[0x1]; u8 read[0x1]; - u8 reserved_0[0x1]; + u8 reserved_at_4[0x1]; u8 srq_receive[0x1]; - u8 reserved_1[0x1a]; + u8 reserved_at_6[0x1a]; }; struct mlx5_ifc_ipv4_layout_bits { - u8 reserved_0[0x60]; + u8 reserved_at_0[0x60]; u8 ipv4[0x20]; }; @@ -316,7 +316,7 @@ struct mlx5_ifc_ipv6_layout_bits { union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits { struct mlx5_ifc_ipv6_layout_bits ipv6_layout; struct mlx5_ifc_ipv4_layout_bits ipv4_layout; - u8 reserved_0[0x80]; + u8 reserved_at_0[0x80]; }; struct mlx5_ifc_fte_match_set_lyr_2_4_bits { @@ -336,15 +336,15 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 ip_dscp[0x6]; u8 ip_ecn[0x2]; u8 vlan_tag[0x1]; - u8 reserved_0[0x1]; + u8 reserved_at_91[0x1]; u8 frag[0x1]; - u8 reserved_1[0x4]; + u8 reserved_at_93[0x4]; u8 tcp_flags[0x9]; u8 tcp_sport[0x10]; u8 tcp_dport[0x10]; - u8 reserved_2[0x20]; + u8 reserved_at_c0[0x20]; u8 udp_sport[0x10]; u8 udp_dport[0x10]; @@ -355,9 +355,9 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { }; struct mlx5_ifc_fte_match_set_misc_bits { - u8 reserved_0[0x20]; + u8 reserved_at_0[0x20]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 source_port[0x10]; u8 outer_second_prio[0x3]; @@ -369,31 +369,31 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 outer_second_vlan_tag[0x1]; u8 inner_second_vlan_tag[0x1]; - u8 reserved_2[0xe]; + u8 reserved_at_62[0xe]; u8 gre_protocol[0x10]; u8 gre_key_h[0x18]; u8 gre_key_l[0x8]; u8 vxlan_vni[0x18]; - u8 reserved_3[0x8]; + u8 reserved_at_b8[0x8]; - u8 reserved_4[0x20]; + u8 reserved_at_c0[0x20]; - u8 reserved_5[0xc]; + u8 reserved_at_e0[0xc]; u8 outer_ipv6_flow_label[0x14]; - u8 reserved_6[0xc]; + u8 reserved_at_100[0xc]; u8 inner_ipv6_flow_label[0x14]; - u8 reserved_7[0xe0]; + u8 reserved_at_120[0xe0]; }; struct mlx5_ifc_cmd_pas_bits { u8 pa_h[0x20]; u8 pa_l[0x14]; - u8 reserved_0[0xc]; + u8 reserved_at_34[0xc]; }; struct mlx5_ifc_uint64_bits { @@ -418,31 +418,31 @@ enum { struct mlx5_ifc_ads_bits { u8 fl[0x1]; u8 free_ar[0x1]; - u8 reserved_0[0xe]; + u8 reserved_at_2[0xe]; u8 pkey_index[0x10]; - u8 reserved_1[0x8]; + u8 reserved_at_20[0x8]; u8 grh[0x1]; u8 mlid[0x7]; u8 rlid[0x10]; u8 ack_timeout[0x5]; - u8 reserved_2[0x3]; + u8 reserved_at_45[0x3]; u8 src_addr_index[0x8]; - u8 reserved_3[0x4]; + u8 reserved_at_50[0x4]; u8 stat_rate[0x4]; u8 hop_limit[0x8]; - u8 reserved_4[0x4]; + u8 reserved_at_60[0x4]; u8 tclass[0x8]; u8 flow_label[0x14]; u8 rgid_rip[16][0x8]; - u8 reserved_5[0x4]; + u8 reserved_at_100[0x4]; u8 f_dscp[0x1]; u8 f_ecn[0x1]; - u8 reserved_6[0x1]; + u8 reserved_at_106[0x1]; u8 f_eth_prio[0x1]; u8 ecn[0x2]; u8 dscp[0x6]; @@ -458,25 +458,25 @@ struct mlx5_ifc_ads_bits { }; struct mlx5_ifc_flow_table_nic_cap_bits { - u8 reserved_0[0x200]; + u8 reserved_at_0[0x200]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; - u8 reserved_1[0x200]; + u8 reserved_at_400[0x200]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_sniffer; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit; - u8 reserved_2[0x200]; + u8 reserved_at_a00[0x200]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer; - u8 reserved_3[0x7200]; + u8 reserved_at_e00[0x7200]; }; struct mlx5_ifc_flow_table_eswitch_cap_bits { - u8 reserved_0[0x200]; + u8 reserved_at_0[0x200]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb; @@ -484,7 +484,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress; - u8 reserved_1[0x7800]; + u8 reserved_at_800[0x7800]; }; struct mlx5_ifc_e_switch_cap_bits { @@ -493,9 +493,9 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_0[0x1b]; + u8 reserved_at_5[0x1b]; - u8 reserved_1[0x7e0]; + u8 reserved_at_20[0x7e0]; }; struct mlx5_ifc_per_protocol_networking_offload_caps_bits { @@ -504,51 +504,51 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 lro_cap[0x1]; u8 lro_psh_flag[0x1]; u8 lro_time_stamp[0x1]; - u8 reserved_0[0x3]; + u8 reserved_at_5[0x3]; u8 self_lb_en_modifiable[0x1]; - u8 reserved_1[0x2]; + u8 reserved_at_9[0x2]; u8 max_lso_cap[0x5]; - u8 reserved_2[0x4]; + u8 reserved_at_10[0x4]; u8 rss_ind_tbl_cap[0x4]; - u8 reserved_3[0x3]; + u8 reserved_at_18[0x3]; u8 tunnel_lso_const_out_ip_id[0x1]; - u8 reserved_4[0x2]; + u8 reserved_at_1c[0x2]; u8 tunnel_statless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; - u8 reserved_5[0x20]; + u8 reserved_at_20[0x20]; - u8 reserved_6[0x10]; + u8 reserved_at_40[0x10]; u8 lro_min_mss_size[0x10]; - u8 reserved_7[0x120]; + u8 reserved_at_60[0x120]; u8 lro_timer_supported_periods[4][0x20]; - u8 reserved_8[0x600]; + u8 reserved_at_200[0x600]; }; struct mlx5_ifc_roce_cap_bits { u8 roce_apm[0x1]; - u8 reserved_0[0x1f]; + u8 reserved_at_1[0x1f]; - u8 reserved_1[0x60]; + u8 reserved_at_20[0x60]; - u8 reserved_2[0xc]; + u8 reserved_at_80[0xc]; u8 l3_type[0x4]; - u8 reserved_3[0x8]; + u8 reserved_at_90[0x8]; u8 roce_version[0x8]; - u8 reserved_4[0x10]; + u8 reserved_at_a0[0x10]; u8 r_roce_dest_udp_port[0x10]; u8 r_roce_max_src_udp_port[0x10]; u8 r_roce_min_src_udp_port[0x10]; - u8 reserved_5[0x10]; + u8 reserved_at_e0[0x10]; u8 roce_address_table_size[0x10]; - u8 reserved_6[0x700]; + u8 reserved_at_100[0x700]; }; enum { @@ -576,35 +576,35 @@ enum { }; struct mlx5_ifc_atomic_caps_bits { - u8 reserved_0[0x40]; + u8 reserved_at_0[0x40]; u8 atomic_req_8B_endianess_mode[0x2]; - u8 reserved_1[0x4]; + u8 reserved_at_42[0x4]; u8 supported_atomic_req_8B_endianess_mode_1[0x1]; - u8 reserved_2[0x19]; + u8 reserved_at_47[0x19]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; - u8 reserved_4[0x10]; + u8 reserved_at_80[0x10]; u8 atomic_operations[0x10]; - u8 reserved_5[0x10]; + u8 reserved_at_a0[0x10]; u8 atomic_size_qp[0x10]; - u8 reserved_6[0x10]; + u8 reserved_at_c0[0x10]; u8 atomic_size_dc[0x10]; - u8 reserved_7[0x720]; + u8 reserved_at_e0[0x720]; }; struct mlx5_ifc_odp_cap_bits { - u8 reserved_0[0x40]; + u8 reserved_at_0[0x40]; u8 sig[0x1]; - u8 reserved_1[0x1f]; + u8 reserved_at_41[0x1f]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; struct mlx5_ifc_odp_per_transport_service_cap_bits rc_odp_caps; @@ -612,7 +612,7 @@ struct mlx5_ifc_odp_cap_bits { struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps; - u8 reserved_3[0x720]; + u8 reserved_at_e0[0x720]; }; enum { @@ -660,55 +660,55 @@ enum { }; struct mlx5_ifc_cmd_hca_cap_bits { - u8 reserved_0[0x80]; + u8 reserved_at_0[0x80]; u8 log_max_srq_sz[0x8]; u8 log_max_qp_sz[0x8]; - u8 reserved_1[0xb]; + u8 reserved_at_90[0xb]; u8 log_max_qp[0x5]; - u8 reserved_2[0xb]; + u8 reserved_at_a0[0xb]; u8 log_max_srq[0x5]; - u8 reserved_3[0x10]; + u8 reserved_at_b0[0x10]; - u8 reserved_4[0x8]; + u8 reserved_at_c0[0x8]; u8 log_max_cq_sz[0x8]; - u8 reserved_5[0xb]; + u8 reserved_at_d0[0xb]; u8 log_max_cq[0x5]; u8 log_max_eq_sz[0x8]; - u8 reserved_6[0x2]; + u8 reserved_at_e8[0x2]; u8 log_max_mkey[0x6]; - u8 reserved_7[0xc]; + u8 reserved_at_f0[0xc]; u8 log_max_eq[0x4]; u8 max_indirection[0x8]; - u8 reserved_8[0x1]; + u8 reserved_at_108[0x1]; u8 log_max_mrw_sz[0x7]; - u8 reserved_9[0x2]; + u8 reserved_at_110[0x2]; u8 log_max_bsf_list_size[0x6]; - u8 reserved_10[0x2]; + u8 reserved_at_118[0x2]; u8 log_max_klm_list_size[0x6]; - u8 reserved_11[0xa]; + u8 reserved_at_120[0xa]; u8 log_max_ra_req_dc[0x6]; - u8 reserved_12[0xa]; + u8 reserved_at_130[0xa]; u8 log_max_ra_res_dc[0x6]; - u8 reserved_13[0xa]; + u8 reserved_at_140[0xa]; u8 log_max_ra_req_qp[0x6]; - u8 reserved_14[0xa]; + u8 reserved_at_150[0xa]; u8 log_max_ra_res_qp[0x6]; u8 pad_cap[0x1]; u8 cc_query_allowed[0x1]; u8 cc_modify_allowed[0x1]; - u8 reserved_15[0xd]; + u8 reserved_at_163[0xd]; u8 gid_table_size[0x10]; u8 out_of_seq_cnt[0x1]; u8 vport_counters[0x1]; - u8 reserved_16[0x4]; + u8 reserved_at_182[0x4]; u8 max_qp_cnt[0xa]; u8 pkey_table_size[0x10]; @@ -716,158 +716,158 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 vhca_group_manager[0x1]; u8 ib_virt[0x1]; u8 eth_virt[0x1]; - u8 reserved_17[0x1]; + u8 reserved_at_1a4[0x1]; u8 ets[0x1]; u8 nic_flow_table[0x1]; u8 eswitch_flow_table[0x1]; u8 early_vf_enable; - u8 reserved_18[0x2]; + u8 reserved_at_1a8[0x2]; u8 local_ca_ack_delay[0x5]; - u8 reserved_19[0x6]; + u8 reserved_at_1af[0x6]; u8 port_type[0x2]; u8 num_ports[0x8]; - u8 reserved_20[0x3]; + u8 reserved_at_1bf[0x3]; u8 log_max_msg[0x5]; - u8 reserved_21[0x18]; + u8 reserved_at_1c7[0x18]; u8 stat_rate_support[0x10]; - u8 reserved_22[0xc]; + u8 reserved_at_1ef[0xc]; u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; - u8 reserved_23[0xe]; + u8 reserved_at_200[0xe]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; - u8 reserved_24[0x1]; + u8 reserved_at_212[0x1]; u8 wq_signature[0x1]; u8 sctr_data_cqe[0x1]; - u8 reserved_25[0x1]; + u8 reserved_at_215[0x1]; u8 sho[0x1]; u8 tph[0x1]; u8 rf[0x1]; u8 dct[0x1]; - u8 reserved_26[0x1]; + u8 reserved_at_21a[0x1]; u8 eth_net_offloads[0x1]; u8 roce[0x1]; u8 atomic[0x1]; - u8 reserved_27[0x1]; + u8 reserved_at_21e[0x1]; u8 cq_oi[0x1]; u8 cq_resize[0x1]; u8 cq_moderation[0x1]; - u8 reserved_28[0x3]; + u8 reserved_at_222[0x3]; u8 cq_eq_remap[0x1]; u8 pg[0x1]; u8 block_lb_mc[0x1]; - u8 reserved_29[0x1]; + u8 reserved_at_228[0x1]; u8 scqe_break_moderation[0x1]; - u8 reserved_30[0x1]; + u8 reserved_at_22a[0x1]; u8 cd[0x1]; - u8 reserved_31[0x1]; + u8 reserved_at_22c[0x1]; u8 apm[0x1]; - u8 reserved_32[0x7]; + u8 reserved_at_22e[0x7]; u8 qkv[0x1]; u8 pkv[0x1]; - u8 reserved_33[0x4]; + u8 reserved_at_237[0x4]; u8 xrc[0x1]; u8 ud[0x1]; u8 uc[0x1]; u8 rc[0x1]; - u8 reserved_34[0xa]; + u8 reserved_at_23f[0xa]; u8 uar_sz[0x6]; - u8 reserved_35[0x8]; + u8 reserved_at_24f[0x8]; u8 log_pg_sz[0x8]; u8 bf[0x1]; - u8 reserved_36[0x1]; + u8 reserved_at_260[0x1]; u8 pad_tx_eth_packet[0x1]; - u8 reserved_37[0x8]; + u8 reserved_at_262[0x8]; u8 log_bf_reg_size[0x5]; - u8 reserved_38[0x10]; + u8 reserved_at_26f[0x10]; - u8 reserved_39[0x10]; + u8 reserved_at_27f[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_40[0x10]; + u8 reserved_at_29f[0x10]; u8 max_wqe_sz_rq[0x10]; - u8 reserved_41[0x10]; + u8 reserved_at_2bf[0x10]; u8 max_wqe_sz_sq_dc[0x10]; - u8 reserved_42[0x7]; + u8 reserved_at_2df[0x7]; u8 max_qp_mcg[0x19]; - u8 reserved_43[0x18]; + u8 reserved_at_2ff[0x18]; u8 log_max_mcg[0x8]; - u8 reserved_44[0x3]; + u8 reserved_at_31f[0x3]; u8 log_max_transport_domain[0x5]; - u8 reserved_45[0x3]; + u8 reserved_at_327[0x3]; u8 log_max_pd[0x5]; - u8 reserved_46[0xb]; + u8 reserved_at_32f[0xb]; u8 log_max_xrcd[0x5]; - u8 reserved_47[0x20]; + u8 reserved_at_33f[0x20]; - u8 reserved_48[0x3]; + u8 reserved_at_35f[0x3]; u8 log_max_rq[0x5]; - u8 reserved_49[0x3]; + u8 reserved_at_367[0x3]; u8 log_max_sq[0x5]; - u8 reserved_50[0x3]; + u8 reserved_at_36f[0x3]; u8 log_max_tir[0x5]; - u8 reserved_51[0x3]; + u8 reserved_at_377[0x3]; u8 log_max_tis[0x5]; u8 basic_cyclic_rcv_wqe[0x1]; - u8 reserved_52[0x2]; + u8 reserved_at_380[0x2]; u8 log_max_rmp[0x5]; - u8 reserved_53[0x3]; + u8 reserved_at_387[0x3]; u8 log_max_rqt[0x5]; - u8 reserved_54[0x3]; + u8 reserved_at_38f[0x3]; u8 log_max_rqt_size[0x5]; - u8 reserved_55[0x3]; + u8 reserved_at_397[0x3]; u8 log_max_tis_per_sq[0x5]; - u8 reserved_56[0x3]; + u8 reserved_at_39f[0x3]; u8 log_max_stride_sz_rq[0x5]; - u8 reserved_57[0x3]; + u8 reserved_at_3a7[0x3]; u8 log_min_stride_sz_rq[0x5]; - u8 reserved_58[0x3]; + u8 reserved_at_3af[0x3]; u8 log_max_stride_sz_sq[0x5]; - u8 reserved_59[0x3]; + u8 reserved_at_3b7[0x3]; u8 log_min_stride_sz_sq[0x5]; - u8 reserved_60[0x1b]; + u8 reserved_at_3bf[0x1b]; u8 log_max_wq_sz[0x5]; u8 nic_vport_change_event[0x1]; - u8 reserved_61[0xa]; + u8 reserved_at_3e0[0xa]; u8 log_max_vlan_list[0x5]; - u8 reserved_62[0x3]; + u8 reserved_at_3ef[0x3]; u8 log_max_current_mc_list[0x5]; - u8 reserved_63[0x3]; + u8 reserved_at_3f7[0x3]; u8 log_max_current_uc_list[0x5]; - u8 reserved_64[0x80]; + u8 reserved_at_3ff[0x80]; - u8 reserved_65[0x3]; + u8 reserved_at_47f[0x3]; u8 log_max_l2_table[0x5]; - u8 reserved_66[0x8]; + u8 reserved_at_487[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_67[0x20]; + u8 reserved_at_49f[0x20]; u8 device_frequency_mhz[0x20]; u8 device_frequency_khz[0x20]; - u8 reserved_68[0x5f]; + u8 reserved_at_4ff[0x5f]; u8 cqe_zip[0x1]; u8 cqe_zip_timeout[0x10]; u8 cqe_zip_max_num[0x10]; - u8 reserved_69[0x220]; + u8 reserved_at_57f[0x220]; }; enum mlx5_flow_destination_type { @@ -880,7 +880,7 @@ struct mlx5_ifc_dest_format_struct_bits { u8 destination_type[0x8]; u8 destination_id[0x18]; - u8 reserved_0[0x20]; + u8 reserved_at_20[0x20]; }; struct mlx5_ifc_fte_match_param_bits { @@ -890,7 +890,7 @@ struct mlx5_ifc_fte_match_param_bits { struct mlx5_ifc_fte_match_set_lyr_2_4_bits inner_headers; - u8 reserved_0[0xa00]; + u8 reserved_at_600[0xa00]; }; enum { @@ -922,18 +922,18 @@ struct mlx5_ifc_wq_bits { u8 wq_signature[0x1]; u8 end_padding_mode[0x2]; u8 cd_slave[0x1]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 hds_skip_first_sge[0x1]; u8 log2_hds_buf_size[0x3]; - u8 reserved_1[0x7]; + u8 reserved_at_24[0x7]; u8 page_offset[0x5]; u8 lwm[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 pd[0x18]; - u8 reserved_3[0x8]; + u8 reserved_at_60[0x8]; u8 uar_page[0x18]; u8 dbr_addr[0x40]; @@ -942,60 +942,60 @@ struct mlx5_ifc_wq_bits { u8 sw_counter[0x20]; - u8 reserved_4[0xc]; + u8 reserved_at_100[0xc]; u8 log_wq_stride[0x4]; - u8 reserved_5[0x3]; + u8 reserved_at_110[0x3]; u8 log_wq_pg_sz[0x5]; - u8 reserved_6[0x3]; + u8 reserved_at_118[0x3]; u8 log_wq_sz[0x5]; - u8 reserved_7[0x4e0]; + u8 reserved_at_120[0x4e0]; struct mlx5_ifc_cmd_pas_bits pas[0]; }; struct mlx5_ifc_rq_num_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 rq_num[0x18]; }; struct mlx5_ifc_mac_address_layout_bits { - u8 reserved_0[0x10]; + u8 reserved_at_0[0x10]; u8 mac_addr_47_32[0x10]; u8 mac_addr_31_0[0x20]; }; struct mlx5_ifc_vlan_layout_bits { - u8 reserved_0[0x14]; + u8 reserved_at_0[0x14]; u8 vlan[0x0c]; - u8 reserved_1[0x20]; + u8 reserved_at_20[0x20]; }; struct mlx5_ifc_cong_control_r_roce_ecn_np_bits { - u8 reserved_0[0xa0]; + u8 reserved_at_0[0xa0]; u8 min_time_between_cnps[0x20]; - u8 reserved_1[0x12]; + u8 reserved_at_c0[0x12]; u8 cnp_dscp[0x6]; - u8 reserved_2[0x5]; + u8 reserved_at_d8[0x5]; u8 cnp_802p_prio[0x3]; - u8 reserved_3[0x720]; + u8 reserved_at_e0[0x720]; }; struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits { - u8 reserved_0[0x60]; + u8 reserved_at_0[0x60]; - u8 reserved_1[0x4]; + u8 reserved_at_60[0x4]; u8 clamp_tgt_rate[0x1]; - u8 reserved_2[0x3]; + u8 reserved_at_65[0x3]; u8 clamp_tgt_rate_after_time_inc[0x1]; - u8 reserved_3[0x17]; + u8 reserved_at_69[0x17]; - u8 reserved_4[0x20]; + u8 reserved_at_80[0x20]; u8 rpg_time_reset[0x20]; @@ -1015,7 +1015,7 @@ struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits { u8 rpg_min_rate[0x20]; - u8 reserved_5[0xe0]; + u8 reserved_at_1c0[0xe0]; u8 rate_to_set_on_first_cnp[0x20]; @@ -1025,15 +1025,15 @@ struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits { u8 rate_reduce_monitor_period[0x20]; - u8 reserved_6[0x20]; + u8 reserved_at_320[0x20]; u8 initial_alpha_value[0x20]; - u8 reserved_7[0x4a0]; + u8 reserved_at_360[0x4a0]; }; struct mlx5_ifc_cong_control_802_1qau_rp_bits { - u8 reserved_0[0x80]; + u8 reserved_at_0[0x80]; u8 rppp_max_rps[0x20]; @@ -1055,7 +1055,7 @@ struct mlx5_ifc_cong_control_802_1qau_rp_bits { u8 rpg_min_rate[0x20]; - u8 reserved_1[0x640]; + u8 reserved_at_1c0[0x640]; }; enum { @@ -1205,7 +1205,7 @@ struct mlx5_ifc_phys_layer_cntrs_bits { u8 successful_recovery_events[0x20]; - u8 reserved_0[0x180]; + u8 reserved_at_640[0x180]; }; struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits { @@ -1213,7 +1213,7 @@ struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits { u8 transmit_queue_low[0x20]; - u8 reserved_0[0x780]; + u8 reserved_at_40[0x780]; }; struct mlx5_ifc_eth_per_prio_grp_data_layout_bits { @@ -1221,7 +1221,7 @@ struct mlx5_ifc_eth_per_prio_grp_data_layout_bits { u8 rx_octets_low[0x20]; - u8 reserved_0[0xc0]; + u8 reserved_at_40[0xc0]; u8 rx_frames_high[0x20]; @@ -1231,7 +1231,7 @@ struct mlx5_ifc_eth_per_prio_grp_data_layout_bits { u8 tx_octets_low[0x20]; - u8 reserved_1[0xc0]; + u8 reserved_at_180[0xc0]; u8 tx_frames_high[0x20]; @@ -1257,7 +1257,7 @@ struct mlx5_ifc_eth_per_prio_grp_data_layout_bits { u8 rx_pause_transition_low[0x20]; - u8 reserved_2[0x400]; + u8 reserved_at_3c0[0x400]; }; struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits { @@ -1265,7 +1265,7 @@ struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits { u8 port_transmit_wait_low[0x20]; - u8 reserved_0[0x780]; + u8 reserved_at_40[0x780]; }; struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits { @@ -1333,7 +1333,7 @@ struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits { u8 dot3out_pause_frames_low[0x20]; - u8 reserved_0[0x3c0]; + u8 reserved_at_400[0x3c0]; }; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits { @@ -1421,7 +1421,7 @@ struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits { u8 ether_stats_pkts8192to10239octets_low[0x20]; - u8 reserved_0[0x280]; + u8 reserved_at_540[0x280]; }; struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits { @@ -1477,7 +1477,7 @@ struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits { u8 if_out_broadcast_pkts_low[0x20]; - u8 reserved_0[0x480]; + u8 reserved_at_340[0x480]; }; struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits { @@ -1557,54 +1557,54 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits { u8 a_pause_mac_ctrl_frames_transmitted_low[0x20]; - u8 reserved_0[0x300]; + u8 reserved_at_4c0[0x300]; }; struct mlx5_ifc_cmd_inter_comp_event_bits { u8 command_completion_vector[0x20]; - u8 reserved_0[0xc0]; + u8 reserved_at_20[0xc0]; }; struct mlx5_ifc_stall_vl_event_bits { - u8 reserved_0[0x18]; + u8 reserved_at_0[0x18]; u8 port_num[0x1]; - u8 reserved_1[0x3]; + u8 reserved_at_19[0x3]; u8 vl[0x4]; - u8 reserved_2[0xa0]; + u8 reserved_at_20[0xa0]; }; struct mlx5_ifc_db_bf_congestion_event_bits { u8 event_subtype[0x8]; - u8 reserved_0[0x8]; + u8 reserved_at_8[0x8]; u8 congestion_level[0x8]; - u8 reserved_1[0x8]; + u8 reserved_at_18[0x8]; - u8 reserved_2[0xa0]; + u8 reserved_at_20[0xa0]; }; struct mlx5_ifc_gpio_event_bits { - u8 reserved_0[0x60]; + u8 reserved_at_0[0x60]; u8 gpio_event_hi[0x20]; u8 gpio_event_lo[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_a0[0x40]; }; struct mlx5_ifc_port_state_change_event_bits { - u8 reserved_0[0x40]; + u8 reserved_at_0[0x40]; u8 port_num[0x4]; - u8 reserved_1[0x1c]; + u8 reserved_at_44[0x1c]; - u8 reserved_2[0x80]; + u8 reserved_at_60[0x80]; }; struct mlx5_ifc_dropped_packet_logged_bits { - u8 reserved_0[0xe0]; + u8 reserved_at_0[0xe0]; }; enum { @@ -1613,15 +1613,15 @@ enum { }; struct mlx5_ifc_cq_error_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 cqn[0x18]; - u8 reserved_1[0x20]; + u8 reserved_at_20[0x20]; - u8 reserved_2[0x18]; + u8 reserved_at_40[0x18]; u8 syndrome[0x8]; - u8 reserved_3[0x80]; + u8 reserved_at_60[0x80]; }; struct mlx5_ifc_rdma_page_fault_event_bits { @@ -1629,14 +1629,14 @@ struct mlx5_ifc_rdma_page_fault_event_bits { u8 r_key[0x20]; - u8 reserved_0[0x10]; + u8 reserved_at_40[0x10]; u8 packet_len[0x10]; u8 rdma_op_len[0x20]; u8 rdma_va[0x40]; - u8 reserved_1[0x5]; + u8 reserved_at_c0[0x5]; u8 rdma[0x1]; u8 write[0x1]; u8 requestor[0x1]; @@ -1646,15 +1646,15 @@ struct mlx5_ifc_rdma_page_fault_event_bits { struct mlx5_ifc_wqe_associated_page_fault_event_bits { u8 bytes_committed[0x20]; - u8 reserved_0[0x10]; + u8 reserved_at_20[0x10]; u8 wqe_index[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_40[0x10]; u8 len[0x10]; - u8 reserved_2[0x60]; + u8 reserved_at_60[0x60]; - u8 reserved_3[0x5]; + u8 reserved_at_c0[0x5]; u8 rdma[0x1]; u8 write_read[0x1]; u8 requestor[0x1]; @@ -1662,26 +1662,26 @@ struct mlx5_ifc_wqe_associated_page_fault_event_bits { }; struct mlx5_ifc_qp_events_bits { - u8 reserved_0[0xa0]; + u8 reserved_at_0[0xa0]; u8 type[0x8]; - u8 reserved_1[0x18]; + u8 reserved_at_a8[0x18]; - u8 reserved_2[0x8]; + u8 reserved_at_c0[0x8]; u8 qpn_rqn_sqn[0x18]; }; struct mlx5_ifc_dct_events_bits { - u8 reserved_0[0xc0]; + u8 reserved_at_0[0xc0]; - u8 reserved_1[0x8]; + u8 reserved_at_c0[0x8]; u8 dct_number[0x18]; }; struct mlx5_ifc_comp_event_bits { - u8 reserved_0[0xc0]; + u8 reserved_at_0[0xc0]; - u8 reserved_1[0x8]; + u8 reserved_at_c0[0x8]; u8 cq_number[0x18]; }; @@ -1754,41 +1754,41 @@ enum { struct mlx5_ifc_qpc_bits { u8 state[0x4]; - u8 reserved_0[0x4]; + u8 reserved_at_4[0x4]; u8 st[0x8]; - u8 reserved_1[0x3]; + u8 reserved_at_10[0x3]; u8 pm_state[0x2]; - u8 reserved_2[0x7]; + u8 reserved_at_15[0x7]; u8 end_padding_mode[0x2]; - u8 reserved_3[0x2]; + u8 reserved_at_1e[0x2]; u8 wq_signature[0x1]; u8 block_lb_mc[0x1]; u8 atomic_like_write_en[0x1]; u8 latency_sensitive[0x1]; - u8 reserved_4[0x1]; + u8 reserved_at_24[0x1]; u8 drain_sigerr[0x1]; - u8 reserved_5[0x2]; + u8 reserved_at_26[0x2]; u8 pd[0x18]; u8 mtu[0x3]; u8 log_msg_max[0x5]; - u8 reserved_6[0x1]; + u8 reserved_at_48[0x1]; u8 log_rq_size[0x4]; u8 log_rq_stride[0x3]; u8 no_sq[0x1]; u8 log_sq_size[0x4]; - u8 reserved_7[0x6]; + u8 reserved_at_55[0x6]; u8 rlky[0x1]; - u8 reserved_8[0x4]; + u8 reserved_at_5c[0x4]; u8 counter_set_id[0x8]; u8 uar_page[0x18]; - u8 reserved_9[0x8]; + u8 reserved_at_80[0x8]; u8 user_index[0x18]; - u8 reserved_10[0x3]; + u8 reserved_at_a0[0x3]; u8 log_page_size[0x5]; u8 remote_qpn[0x18]; @@ -1797,66 +1797,66 @@ struct mlx5_ifc_qpc_bits { struct mlx5_ifc_ads_bits secondary_address_path; u8 log_ack_req_freq[0x4]; - u8 reserved_11[0x4]; + u8 reserved_at_384[0x4]; u8 log_sra_max[0x3]; - u8 reserved_12[0x2]; + u8 reserved_at_38b[0x2]; u8 retry_count[0x3]; u8 rnr_retry[0x3]; - u8 reserved_13[0x1]; + u8 reserved_at_393[0x1]; u8 fre[0x1]; u8 cur_rnr_retry[0x3]; u8 cur_retry_count[0x3]; - u8 reserved_14[0x5]; + u8 reserved_at_39b[0x5]; - u8 reserved_15[0x20]; + u8 reserved_at_3a0[0x20]; - u8 reserved_16[0x8]; + u8 reserved_at_3c0[0x8]; u8 next_send_psn[0x18]; - u8 reserved_17[0x8]; + u8 reserved_at_3e0[0x8]; u8 cqn_snd[0x18]; - u8 reserved_18[0x40]; + u8 reserved_at_400[0x40]; - u8 reserved_19[0x8]; + u8 reserved_at_440[0x8]; u8 last_acked_psn[0x18]; - u8 reserved_20[0x8]; + u8 reserved_at_460[0x8]; u8 ssn[0x18]; - u8 reserved_21[0x8]; + u8 reserved_at_480[0x8]; u8 log_rra_max[0x3]; - u8 reserved_22[0x1]; + u8 reserved_at_48b[0x1]; u8 atomic_mode[0x4]; u8 rre[0x1]; u8 rwe[0x1]; u8 rae[0x1]; - u8 reserved_23[0x1]; + u8 reserved_at_493[0x1]; u8 page_offset[0x6]; - u8 reserved_24[0x3]; + u8 reserved_at_49a[0x3]; u8 cd_slave_receive[0x1]; u8 cd_slave_send[0x1]; u8 cd_master[0x1]; - u8 reserved_25[0x3]; + u8 reserved_at_4a0[0x3]; u8 min_rnr_nak[0x5]; u8 next_rcv_psn[0x18]; - u8 reserved_26[0x8]; + u8 reserved_at_4c0[0x8]; u8 xrcd[0x18]; - u8 reserved_27[0x8]; + u8 reserved_at_4e0[0x8]; u8 cqn_rcv[0x18]; u8 dbr_addr[0x40]; u8 q_key[0x20]; - u8 reserved_28[0x5]; + u8 reserved_at_560[0x5]; u8 rq_type[0x3]; u8 srqn_rmpn[0x18]; - u8 reserved_29[0x8]; + u8 reserved_at_580[0x8]; u8 rmsn[0x18]; u8 hw_sq_wqebb_counter[0x10]; @@ -1866,33 +1866,33 @@ struct mlx5_ifc_qpc_bits { u8 sw_rq_counter[0x20]; - u8 reserved_30[0x20]; + u8 reserved_at_600[0x20]; - u8 reserved_31[0xf]; + u8 reserved_at_620[0xf]; u8 cgs[0x1]; u8 cs_req[0x8]; u8 cs_res[0x8]; u8 dc_access_key[0x40]; - u8 reserved_32[0xc0]; + u8 reserved_at_680[0xc0]; }; struct mlx5_ifc_roce_addr_layout_bits { u8 source_l3_address[16][0x8]; - u8 reserved_0[0x3]; + u8 reserved_at_80[0x3]; u8 vlan_valid[0x1]; u8 vlan_id[0xc]; u8 source_mac_47_32[0x10]; u8 source_mac_31_0[0x20]; - u8 reserved_1[0x14]; + u8 reserved_at_c0[0x14]; u8 roce_l3_type[0x4]; u8 roce_version[0x8]; - u8 reserved_2[0x20]; + u8 reserved_at_e0[0x20]; }; union mlx5_ifc_hca_cap_union_bits { @@ -1904,7 +1904,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; struct mlx5_ifc_e_switch_cap_bits e_switch_cap; - u8 reserved_0[0x8000]; + u8 reserved_at_0[0x8000]; }; enum { @@ -1914,24 +1914,24 @@ enum { }; struct mlx5_ifc_flow_context_bits { - u8 reserved_0[0x20]; + u8 reserved_at_0[0x20]; u8 group_id[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 flow_tag[0x18]; - u8 reserved_2[0x10]; + u8 reserved_at_60[0x10]; u8 action[0x10]; - u8 reserved_3[0x8]; + u8 reserved_at_80[0x8]; u8 destination_list_size[0x18]; - u8 reserved_4[0x160]; + u8 reserved_at_a0[0x160]; struct mlx5_ifc_fte_match_param_bits match_value; - u8 reserved_5[0x600]; + u8 reserved_at_1200[0x600]; struct mlx5_ifc_dest_format_struct_bits destination[0]; }; @@ -1944,43 +1944,43 @@ enum { struct mlx5_ifc_xrc_srqc_bits { u8 state[0x4]; u8 log_xrc_srq_size[0x4]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 wq_signature[0x1]; u8 cont_srq[0x1]; - u8 reserved_1[0x1]; + u8 reserved_at_22[0x1]; u8 rlky[0x1]; u8 basic_cyclic_rcv_wqe[0x1]; u8 log_rq_stride[0x3]; u8 xrcd[0x18]; u8 page_offset[0x6]; - u8 reserved_2[0x2]; + u8 reserved_at_46[0x2]; u8 cqn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 user_index_equal_xrc_srqn[0x1]; - u8 reserved_4[0x1]; + u8 reserved_at_81[0x1]; u8 log_page_size[0x6]; u8 user_index[0x18]; - u8 reserved_5[0x20]; + u8 reserved_at_a0[0x20]; - u8 reserved_6[0x8]; + u8 reserved_at_c0[0x8]; u8 pd[0x18]; u8 lwm[0x10]; u8 wqe_cnt[0x10]; - u8 reserved_7[0x40]; + u8 reserved_at_100[0x40]; u8 db_record_addr_h[0x20]; u8 db_record_addr_l[0x1e]; - u8 reserved_8[0x2]; + u8 reserved_at_17e[0x2]; - u8 reserved_9[0x80]; + u8 reserved_at_180[0x80]; }; struct mlx5_ifc_traffic_counter_bits { @@ -1990,16 +1990,16 @@ struct mlx5_ifc_traffic_counter_bits { }; struct mlx5_ifc_tisc_bits { - u8 reserved_0[0xc]; + u8 reserved_at_0[0xc]; u8 prio[0x4]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_2[0x100]; + u8 reserved_at_20[0x100]; - u8 reserved_3[0x8]; + u8 reserved_at_120[0x8]; u8 transport_domain[0x18]; - u8 reserved_4[0x3c0]; + u8 reserved_at_140[0x3c0]; }; enum { @@ -2024,31 +2024,31 @@ enum { }; struct mlx5_ifc_tirc_bits { - u8 reserved_0[0x20]; + u8 reserved_at_0[0x20]; u8 disp_type[0x4]; - u8 reserved_1[0x1c]; + u8 reserved_at_24[0x1c]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; - u8 reserved_3[0x4]; + u8 reserved_at_80[0x4]; u8 lro_timeout_period_usecs[0x10]; u8 lro_enable_mask[0x4]; u8 lro_max_ip_payload_size[0x8]; - u8 reserved_4[0x40]; + u8 reserved_at_a0[0x40]; - u8 reserved_5[0x8]; + u8 reserved_at_e0[0x8]; u8 inline_rqn[0x18]; u8 rx_hash_symmetric[0x1]; - u8 reserved_6[0x1]; + u8 reserved_at_101[0x1]; u8 tunneled_offload_en[0x1]; - u8 reserved_7[0x5]; + u8 reserved_at_103[0x5]; u8 indirect_table[0x18]; u8 rx_hash_fn[0x4]; - u8 reserved_8[0x2]; + u8 reserved_at_124[0x2]; u8 self_lb_block[0x2]; u8 transport_domain[0x18]; @@ -2058,7 +2058,7 @@ struct mlx5_ifc_tirc_bits { struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_inner; - u8 reserved_9[0x4c0]; + u8 reserved_at_2c0[0x4c0]; }; enum { @@ -2069,39 +2069,39 @@ enum { struct mlx5_ifc_srqc_bits { u8 state[0x4]; u8 log_srq_size[0x4]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 wq_signature[0x1]; u8 cont_srq[0x1]; - u8 reserved_1[0x1]; + u8 reserved_at_22[0x1]; u8 rlky[0x1]; - u8 reserved_2[0x1]; + u8 reserved_at_24[0x1]; u8 log_rq_stride[0x3]; u8 xrcd[0x18]; u8 page_offset[0x6]; - u8 reserved_3[0x2]; + u8 reserved_at_46[0x2]; u8 cqn[0x18]; - u8 reserved_4[0x20]; + u8 reserved_at_60[0x20]; - u8 reserved_5[0x2]; + u8 reserved_at_80[0x2]; u8 log_page_size[0x6]; - u8 reserved_6[0x18]; + u8 reserved_at_88[0x18]; - u8 reserved_7[0x20]; + u8 reserved_at_a0[0x20]; - u8 reserved_8[0x8]; + u8 reserved_at_c0[0x8]; u8 pd[0x18]; u8 lwm[0x10]; u8 wqe_cnt[0x10]; - u8 reserved_9[0x40]; + u8 reserved_at_100[0x40]; u8 dbr_addr[0x40]; - u8 reserved_10[0x80]; + u8 reserved_at_180[0x80]; }; enum { @@ -2115,39 +2115,39 @@ struct mlx5_ifc_sqc_bits { u8 cd_master[0x1]; u8 fre[0x1]; u8 flush_in_error_en[0x1]; - u8 reserved_0[0x4]; + u8 reserved_at_4[0x4]; u8 state[0x4]; - u8 reserved_1[0x14]; + u8 reserved_at_c[0x14]; - u8 reserved_2[0x8]; + u8 reserved_at_20[0x8]; u8 user_index[0x18]; - u8 reserved_3[0x8]; + u8 reserved_at_40[0x8]; u8 cqn[0x18]; - u8 reserved_4[0xa0]; + u8 reserved_at_60[0xa0]; u8 tis_lst_sz[0x10]; - u8 reserved_5[0x10]; + u8 reserved_at_110[0x10]; - u8 reserved_6[0x40]; + u8 reserved_at_120[0x40]; - u8 reserved_7[0x8]; + u8 reserved_at_160[0x8]; u8 tis_num_0[0x18]; struct mlx5_ifc_wq_bits wq; }; struct mlx5_ifc_rqtc_bits { - u8 reserved_0[0xa0]; + u8 reserved_at_0[0xa0]; - u8 reserved_1[0x10]; + u8 reserved_at_a0[0x10]; u8 rqt_max_size[0x10]; - u8 reserved_2[0x10]; + u8 reserved_at_c0[0x10]; u8 rqt_actual_size[0x10]; - u8 reserved_3[0x6a0]; + u8 reserved_at_e0[0x6a0]; struct mlx5_ifc_rq_num_bits rq_num[0]; }; @@ -2165,27 +2165,27 @@ enum { struct mlx5_ifc_rqc_bits { u8 rlky[0x1]; - u8 reserved_0[0x2]; + u8 reserved_at_1[0x2]; u8 vsd[0x1]; u8 mem_rq_type[0x4]; u8 state[0x4]; - u8 reserved_1[0x1]; + u8 reserved_at_c[0x1]; u8 flush_in_error_en[0x1]; - u8 reserved_2[0x12]; + u8 reserved_at_e[0x12]; - u8 reserved_3[0x8]; + u8 reserved_at_20[0x8]; u8 user_index[0x18]; - u8 reserved_4[0x8]; + u8 reserved_at_40[0x8]; u8 cqn[0x18]; u8 counter_set_id[0x8]; - u8 reserved_5[0x18]; + u8 reserved_at_68[0x18]; - u8 reserved_6[0x8]; + u8 reserved_at_80[0x8]; u8 rmpn[0x18]; - u8 reserved_7[0xe0]; + u8 reserved_at_a0[0xe0]; struct mlx5_ifc_wq_bits wq; }; @@ -2196,31 +2196,31 @@ enum { }; struct mlx5_ifc_rmpc_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 state[0x4]; - u8 reserved_1[0x14]; + u8 reserved_at_c[0x14]; u8 basic_cyclic_rcv_wqe[0x1]; - u8 reserved_2[0x1f]; + u8 reserved_at_21[0x1f]; - u8 reserved_3[0x140]; + u8 reserved_at_40[0x140]; struct mlx5_ifc_wq_bits wq; }; struct mlx5_ifc_nic_vport_context_bits { - u8 reserved_0[0x1f]; + u8 reserved_at_0[0x1f]; u8 roce_en[0x1]; u8 arm_change_event[0x1]; - u8 reserved_1[0x1a]; + u8 reserved_at_21[0x1a]; u8 event_on_mtu[0x1]; u8 event_on_promisc_change[0x1]; u8 event_on_vlan_change[0x1]; u8 event_on_mc_address_change[0x1]; u8 event_on_uc_address_change[0x1]; - u8 reserved_2[0xf0]; + u8 reserved_at_40[0xf0]; u8 mtu[0x10]; @@ -2228,21 +2228,21 @@ struct mlx5_ifc_nic_vport_context_bits { u8 port_guid[0x40]; u8 node_guid[0x40]; - u8 reserved_3[0x140]; + u8 reserved_at_200[0x140]; u8 qkey_violation_counter[0x10]; - u8 reserved_4[0x430]; + u8 reserved_at_350[0x430]; u8 promisc_uc[0x1]; u8 promisc_mc[0x1]; u8 promisc_all[0x1]; - u8 reserved_5[0x2]; + u8 reserved_at_783[0x2]; u8 allowed_list_type[0x3]; - u8 reserved_6[0xc]; + u8 reserved_at_788[0xc]; u8 allowed_list_size[0xc]; struct mlx5_ifc_mac_address_layout_bits permanent_address; - u8 reserved_7[0x20]; + u8 reserved_at_7e0[0x20]; u8 current_uc_mac_address[0][0x40]; }; @@ -2254,9 +2254,9 @@ enum { }; struct mlx5_ifc_mkc_bits { - u8 reserved_0[0x1]; + u8 reserved_at_0[0x1]; u8 free[0x1]; - u8 reserved_1[0xd]; + u8 reserved_at_2[0xd]; u8 small_fence_on_rdma_read_response[0x1]; u8 umr_en[0x1]; u8 a[0x1]; @@ -2265,19 +2265,19 @@ struct mlx5_ifc_mkc_bits { u8 lw[0x1]; u8 lr[0x1]; u8 access_mode[0x2]; - u8 reserved_2[0x8]; + u8 reserved_at_18[0x8]; u8 qpn[0x18]; u8 mkey_7_0[0x8]; - u8 reserved_3[0x20]; + u8 reserved_at_40[0x20]; u8 length64[0x1]; u8 bsf_en[0x1]; u8 sync_umr[0x1]; - u8 reserved_4[0x2]; + u8 reserved_at_63[0x2]; u8 expected_sigerr_count[0x1]; - u8 reserved_5[0x1]; + u8 reserved_at_66[0x1]; u8 en_rinval[0x1]; u8 pd[0x18]; @@ -2287,18 +2287,18 @@ struct mlx5_ifc_mkc_bits { u8 bsf_octword_size[0x20]; - u8 reserved_6[0x80]; + u8 reserved_at_120[0x80]; u8 translations_octword_size[0x20]; - u8 reserved_7[0x1b]; + u8 reserved_at_1c0[0x1b]; u8 log_page_size[0x5]; - u8 reserved_8[0x20]; + u8 reserved_at_1e0[0x20]; }; struct mlx5_ifc_pkey_bits { - u8 reserved_0[0x10]; + u8 reserved_at_0[0x10]; u8 pkey[0x10]; }; @@ -2309,19 +2309,19 @@ struct mlx5_ifc_array128_auto_bits { struct mlx5_ifc_hca_vport_context_bits { u8 field_select[0x20]; - u8 reserved_0[0xe0]; + u8 reserved_at_20[0xe0]; u8 sm_virt_aware[0x1]; u8 has_smi[0x1]; u8 has_raw[0x1]; u8 grh_required[0x1]; - u8 reserved_1[0xc]; + u8 reserved_at_104[0xc]; u8 port_physical_state[0x4]; u8 vport_state_policy[0x4]; u8 port_state[0x4]; u8 vport_state[0x4]; - u8 reserved_2[0x20]; + u8 reserved_at_120[0x20]; u8 system_image_guid[0x40]; @@ -2337,33 +2337,33 @@ struct mlx5_ifc_hca_vport_context_bits { u8 cap_mask2_field_select[0x20]; - u8 reserved_3[0x80]; + u8 reserved_at_280[0x80]; u8 lid[0x10]; - u8 reserved_4[0x4]; + u8 reserved_at_310[0x4]; u8 init_type_reply[0x4]; u8 lmc[0x3]; u8 subnet_timeout[0x5]; u8 sm_lid[0x10]; u8 sm_sl[0x4]; - u8 reserved_5[0xc]; + u8 reserved_at_334[0xc]; u8 qkey_violation_counter[0x10]; u8 pkey_violation_counter[0x10]; - u8 reserved_6[0xca0]; + u8 reserved_at_360[0xca0]; }; struct mlx5_ifc_esw_vport_context_bits { - u8 reserved_0[0x3]; + u8 reserved_at_0[0x3]; u8 vport_svlan_strip[0x1]; u8 vport_cvlan_strip[0x1]; u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert[0x2]; - u8 reserved_1[0x18]; + u8 reserved_at_8[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_20[0x20]; u8 svlan_cfi[0x1]; u8 svlan_pcp[0x3]; @@ -2372,7 +2372,7 @@ struct mlx5_ifc_esw_vport_context_bits { u8 cvlan_pcp[0x3]; u8 cvlan_id[0xc]; - u8 reserved_3[0x7a0]; + u8 reserved_at_60[0x7a0]; }; enum { @@ -2387,41 +2387,41 @@ enum { struct mlx5_ifc_eqc_bits { u8 status[0x4]; - u8 reserved_0[0x9]; + u8 reserved_at_4[0x9]; u8 ec[0x1]; u8 oi[0x1]; - u8 reserved_1[0x5]; + u8 reserved_at_f[0x5]; u8 st[0x4]; - u8 reserved_2[0x8]; + u8 reserved_at_18[0x8]; - u8 reserved_3[0x20]; + u8 reserved_at_20[0x20]; - u8 reserved_4[0x14]; + u8 reserved_at_40[0x14]; u8 page_offset[0x6]; - u8 reserved_5[0x6]; + u8 reserved_at_5a[0x6]; - u8 reserved_6[0x3]; + u8 reserved_at_60[0x3]; u8 log_eq_size[0x5]; u8 uar_page[0x18]; - u8 reserved_7[0x20]; + u8 reserved_at_80[0x20]; - u8 reserved_8[0x18]; + u8 reserved_at_a0[0x18]; u8 intr[0x8]; - u8 reserved_9[0x3]; + u8 reserved_at_c0[0x3]; u8 log_page_size[0x5]; - u8 reserved_10[0x18]; + u8 reserved_at_c8[0x18]; - u8 reserved_11[0x60]; + u8 reserved_at_e0[0x60]; - u8 reserved_12[0x8]; + u8 reserved_at_140[0x8]; u8 consumer_counter[0x18]; - u8 reserved_13[0x8]; + u8 reserved_at_160[0x8]; u8 producer_counter[0x18]; - u8 reserved_14[0x80]; + u8 reserved_at_180[0x80]; }; enum { @@ -2445,14 +2445,14 @@ enum { }; struct mlx5_ifc_dctc_bits { - u8 reserved_0[0x4]; + u8 reserved_at_0[0x4]; u8 state[0x4]; - u8 reserved_1[0x18]; + u8 reserved_at_8[0x18]; - u8 reserved_2[0x8]; + u8 reserved_at_20[0x8]; u8 user_index[0x18]; - u8 reserved_3[0x8]; + u8 reserved_at_40[0x8]; u8 cqn[0x18]; u8 counter_set_id[0x8]; @@ -2464,45 +2464,45 @@ struct mlx5_ifc_dctc_bits { u8 latency_sensitive[0x1]; u8 rlky[0x1]; u8 free_ar[0x1]; - u8 reserved_4[0xd]; + u8 reserved_at_73[0xd]; - u8 reserved_5[0x8]; + u8 reserved_at_80[0x8]; u8 cs_res[0x8]; - u8 reserved_6[0x3]; + u8 reserved_at_90[0x3]; u8 min_rnr_nak[0x5]; - u8 reserved_7[0x8]; + u8 reserved_at_98[0x8]; - u8 reserved_8[0x8]; + u8 reserved_at_a0[0x8]; u8 srqn[0x18]; - u8 reserved_9[0x8]; + u8 reserved_at_c0[0x8]; u8 pd[0x18]; u8 tclass[0x8]; - u8 reserved_10[0x4]; + u8 reserved_at_e8[0x4]; u8 flow_label[0x14]; u8 dc_access_key[0x40]; - u8 reserved_11[0x5]; + u8 reserved_at_140[0x5]; u8 mtu[0x3]; u8 port[0x8]; u8 pkey_index[0x10]; - u8 reserved_12[0x8]; + u8 reserved_at_160[0x8]; u8 my_addr_index[0x8]; - u8 reserved_13[0x8]; + u8 reserved_at_170[0x8]; u8 hop_limit[0x8]; u8 dc_access_key_violation_count[0x20]; - u8 reserved_14[0x14]; + u8 reserved_at_1a0[0x14]; u8 dei_cfi[0x1]; u8 eth_prio[0x3]; u8 ecn[0x2]; u8 dscp[0x6]; - u8 reserved_15[0x40]; + u8 reserved_at_1c0[0x40]; }; enum { @@ -2524,54 +2524,54 @@ enum { struct mlx5_ifc_cqc_bits { u8 status[0x4]; - u8 reserved_0[0x4]; + u8 reserved_at_4[0x4]; u8 cqe_sz[0x3]; u8 cc[0x1]; - u8 reserved_1[0x1]; + u8 reserved_at_c[0x1]; u8 scqe_break_moderation_en[0x1]; u8 oi[0x1]; - u8 reserved_2[0x2]; + u8 reserved_at_f[0x2]; u8 cqe_zip_en[0x1]; u8 mini_cqe_res_format[0x2]; u8 st[0x4]; - u8 reserved_3[0x8]; + u8 reserved_at_18[0x8]; - u8 reserved_4[0x20]; + u8 reserved_at_20[0x20]; - u8 reserved_5[0x14]; + u8 reserved_at_40[0x14]; u8 page_offset[0x6]; - u8 reserved_6[0x6]; + u8 reserved_at_5a[0x6]; - u8 reserved_7[0x3]; + u8 reserved_at_60[0x3]; u8 log_cq_size[0x5]; u8 uar_page[0x18]; - u8 reserved_8[0x4]; + u8 reserved_at_80[0x4]; u8 cq_period[0xc]; u8 cq_max_count[0x10]; - u8 reserved_9[0x18]; + u8 reserved_at_a0[0x18]; u8 c_eqn[0x8]; - u8 reserved_10[0x3]; + u8 reserved_at_c0[0x3]; u8 log_page_size[0x5]; - u8 reserved_11[0x18]; + u8 reserved_at_c8[0x18]; - u8 reserved_12[0x20]; + u8 reserved_at_e0[0x20]; - u8 reserved_13[0x8]; + u8 reserved_at_100[0x8]; u8 last_notified_index[0x18]; - u8 reserved_14[0x8]; + u8 reserved_at_120[0x8]; u8 last_solicit_index[0x18]; - u8 reserved_15[0x8]; + u8 reserved_at_140[0x8]; u8 consumer_counter[0x18]; - u8 reserved_16[0x8]; + u8 reserved_at_160[0x8]; u8 producer_counter[0x18]; - u8 reserved_17[0x40]; + u8 reserved_at_180[0x40]; u8 dbr_addr[0x40]; }; @@ -2580,16 +2580,16 @@ union mlx5_ifc_cong_control_roce_ecn_auto_bits { struct mlx5_ifc_cong_control_802_1qau_rp_bits cong_control_802_1qau_rp; struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits cong_control_r_roce_ecn_rp; struct mlx5_ifc_cong_control_r_roce_ecn_np_bits cong_control_r_roce_ecn_np; - u8 reserved_0[0x800]; + u8 reserved_at_0[0x800]; }; struct mlx5_ifc_query_adapter_param_block_bits { - u8 reserved_0[0xc0]; + u8 reserved_at_0[0xc0]; - u8 reserved_1[0x8]; + u8 reserved_at_c0[0x8]; u8 ieee_vendor_id[0x18]; - u8 reserved_2[0x10]; + u8 reserved_at_e0[0x10]; u8 vsd_vendor_id[0x10]; u8 vsd[208][0x8]; @@ -2600,14 +2600,14 @@ struct mlx5_ifc_query_adapter_param_block_bits { union mlx5_ifc_modify_field_select_resize_field_select_auto_bits { struct mlx5_ifc_modify_field_select_bits modify_field_select; struct mlx5_ifc_resize_field_select_bits resize_field_select; - u8 reserved_0[0x20]; + u8 reserved_at_0[0x20]; }; union mlx5_ifc_field_select_802_1_r_roce_auto_bits { struct mlx5_ifc_field_select_802_1qau_rp_bits field_select_802_1qau_rp; struct mlx5_ifc_field_select_r_roce_rp_bits field_select_r_roce_rp; struct mlx5_ifc_field_select_r_roce_np_bits field_select_r_roce_np; - u8 reserved_0[0x20]; + u8 reserved_at_0[0x20]; }; union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { @@ -2619,7 +2619,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout; struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout; struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; - u8 reserved_0[0x7c0]; + u8 reserved_at_0[0x7c0]; }; union mlx5_ifc_event_auto_bits { @@ -2635,23 +2635,23 @@ union mlx5_ifc_event_auto_bits { struct mlx5_ifc_db_bf_congestion_event_bits db_bf_congestion_event; struct mlx5_ifc_stall_vl_event_bits stall_vl_event; struct mlx5_ifc_cmd_inter_comp_event_bits cmd_inter_comp_event; - u8 reserved_0[0xe0]; + u8 reserved_at_0[0xe0]; }; struct mlx5_ifc_health_buffer_bits { - u8 reserved_0[0x100]; + u8 reserved_at_0[0x100]; u8 assert_existptr[0x20]; u8 assert_callra[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_140[0x40]; u8 fw_version[0x20]; u8 hw_id[0x20]; - u8 reserved_2[0x20]; + u8 reserved_at_1c0[0x20]; u8 irisc_index[0x8]; u8 synd[0x8]; @@ -2660,20 +2660,20 @@ struct mlx5_ifc_health_buffer_bits { struct mlx5_ifc_register_loopback_control_bits { u8 no_lb[0x1]; - u8 reserved_0[0x7]; + u8 reserved_at_1[0x7]; u8 port[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_2[0x60]; + u8 reserved_at_20[0x60]; }; struct mlx5_ifc_teardown_hca_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; enum { @@ -2683,108 +2683,108 @@ enum { struct mlx5_ifc_teardown_hca_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x10]; + u8 reserved_at_40[0x10]; u8 profile[0x10]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_sqerr2rts_qp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_sqerr2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 opt_param_mask[0x20]; - u8 reserved_4[0x20]; + u8 reserved_at_a0[0x20]; struct mlx5_ifc_qpc_bits qpc; - u8 reserved_5[0x80]; + u8 reserved_at_800[0x80]; }; struct mlx5_ifc_sqd2rts_qp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_sqd2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 opt_param_mask[0x20]; - u8 reserved_4[0x20]; + u8 reserved_at_a0[0x20]; struct mlx5_ifc_qpc_bits qpc; - u8 reserved_5[0x80]; + u8 reserved_at_800[0x80]; }; struct mlx5_ifc_set_roce_address_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_set_roce_address_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 roce_address_index[0x10]; - u8 reserved_2[0x10]; + u8 reserved_at_50[0x10]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; struct mlx5_ifc_roce_addr_layout_bits roce_address; }; struct mlx5_ifc_set_mad_demux_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; enum { @@ -2794,89 +2794,89 @@ enum { struct mlx5_ifc_set_mad_demux_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x20]; + u8 reserved_at_40[0x20]; - u8 reserved_3[0x6]; + u8 reserved_at_60[0x6]; u8 demux_mode[0x2]; - u8 reserved_4[0x18]; + u8 reserved_at_68[0x18]; }; struct mlx5_ifc_set_l2_table_entry_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_set_l2_table_entry_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x60]; + u8 reserved_at_40[0x60]; - u8 reserved_3[0x8]; + u8 reserved_at_a0[0x8]; u8 table_index[0x18]; - u8 reserved_4[0x20]; + u8 reserved_at_c0[0x20]; - u8 reserved_5[0x13]; + u8 reserved_at_e0[0x13]; u8 vlan_valid[0x1]; u8 vlan[0xc]; struct mlx5_ifc_mac_address_layout_bits mac_address; - u8 reserved_6[0xc0]; + u8 reserved_at_140[0xc0]; }; struct mlx5_ifc_set_issi_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_set_issi_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x10]; + u8 reserved_at_40[0x10]; u8 current_issi[0x10]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_set_hca_cap_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_set_hca_cap_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; union mlx5_ifc_hca_cap_union_bits capability; }; @@ -2890,156 +2890,156 @@ enum { struct mlx5_ifc_set_fte_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_set_fte_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; u8 table_type[0x8]; - u8 reserved_3[0x18]; + u8 reserved_at_88[0x18]; - u8 reserved_4[0x8]; + u8 reserved_at_a0[0x8]; u8 table_id[0x18]; - u8 reserved_5[0x18]; + u8 reserved_at_c0[0x18]; u8 modify_enable_mask[0x8]; - u8 reserved_6[0x20]; + u8 reserved_at_e0[0x20]; u8 flow_index[0x20]; - u8 reserved_7[0xe0]; + u8 reserved_at_120[0xe0]; struct mlx5_ifc_flow_context_bits flow_context; }; struct mlx5_ifc_rts2rts_qp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_rts2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 opt_param_mask[0x20]; - u8 reserved_4[0x20]; + u8 reserved_at_a0[0x20]; struct mlx5_ifc_qpc_bits qpc; - u8 reserved_5[0x80]; + u8 reserved_at_800[0x80]; }; struct mlx5_ifc_rtr2rts_qp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_rtr2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 opt_param_mask[0x20]; - u8 reserved_4[0x20]; + u8 reserved_at_a0[0x20]; struct mlx5_ifc_qpc_bits qpc; - u8 reserved_5[0x80]; + u8 reserved_at_800[0x80]; }; struct mlx5_ifc_rst2init_qp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_rst2init_qp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 opt_param_mask[0x20]; - u8 reserved_4[0x20]; + u8 reserved_at_a0[0x20]; struct mlx5_ifc_qpc_bits qpc; - u8 reserved_5[0x80]; + u8 reserved_at_800[0x80]; }; struct mlx5_ifc_query_xrc_srq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; - u8 reserved_2[0x600]; + u8 reserved_at_280[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_query_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 xrc_srqn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; enum { @@ -3049,13 +3049,13 @@ enum { struct mlx5_ifc_query_vport_state_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x20]; + u8 reserved_at_40[0x20]; - u8 reserved_2[0x18]; + u8 reserved_at_60[0x18]; u8 admin_state[0x4]; u8 state[0x4]; }; @@ -3067,25 +3067,25 @@ enum { struct mlx5_ifc_query_vport_state_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_at_41[0xf]; u8 vport_number[0x10]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_vport_counter_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_traffic_counter_bits received_errors; @@ -3111,7 +3111,7 @@ struct mlx5_ifc_query_vport_counter_out_bits { struct mlx5_ifc_traffic_counter_bits transmitted_eth_multicast; - u8 reserved_2[0xa00]; + u8 reserved_at_680[0xa00]; }; enum { @@ -3120,328 +3120,328 @@ enum { struct mlx5_ifc_query_vport_counter_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_at_41[0xf]; u8 vport_number[0x10]; - u8 reserved_3[0x60]; + u8 reserved_at_60[0x60]; u8 clear[0x1]; - u8 reserved_4[0x1f]; + u8 reserved_at_c1[0x1f]; - u8 reserved_5[0x20]; + u8 reserved_at_e0[0x20]; }; struct mlx5_ifc_query_tis_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_tisc_bits tis_context; }; struct mlx5_ifc_query_tis_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 tisn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_tir_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0xc0]; + u8 reserved_at_40[0xc0]; struct mlx5_ifc_tirc_bits tir_context; }; struct mlx5_ifc_query_tir_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 tirn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_srq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_srqc_bits srq_context_entry; - u8 reserved_2[0x600]; + u8 reserved_at_280[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_query_srq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 srqn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_sq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0xc0]; + u8 reserved_at_40[0xc0]; struct mlx5_ifc_sqc_bits sq_context; }; struct mlx5_ifc_query_sq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 sqn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_special_contexts_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x20]; + u8 reserved_at_40[0x20]; u8 resd_lkey[0x20]; }; struct mlx5_ifc_query_special_contexts_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_query_rqt_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0xc0]; + u8 reserved_at_40[0xc0]; struct mlx5_ifc_rqtc_bits rqt_context; }; struct mlx5_ifc_query_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 rqtn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_rq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0xc0]; + u8 reserved_at_40[0xc0]; struct mlx5_ifc_rqc_bits rq_context; }; struct mlx5_ifc_query_rq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 rqn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_roce_address_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_roce_addr_layout_bits roce_address; }; struct mlx5_ifc_query_roce_address_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 roce_address_index[0x10]; - u8 reserved_2[0x10]; + u8 reserved_at_50[0x10]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_rmp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0xc0]; + u8 reserved_at_40[0xc0]; struct mlx5_ifc_rmpc_bits rmp_context; }; struct mlx5_ifc_query_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 rmpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_qp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; u8 opt_param_mask[0x20]; - u8 reserved_2[0x20]; + u8 reserved_at_a0[0x20]; struct mlx5_ifc_qpc_bits qpc; - u8 reserved_3[0x80]; + u8 reserved_at_800[0x80]; u8 pas[0][0x40]; }; struct mlx5_ifc_query_qp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_q_counter_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; u8 rx_write_requests[0x20]; - u8 reserved_2[0x20]; + u8 reserved_at_a0[0x20]; u8 rx_read_requests[0x20]; - u8 reserved_3[0x20]; + u8 reserved_at_e0[0x20]; u8 rx_atomic_requests[0x20]; - u8 reserved_4[0x20]; + u8 reserved_at_120[0x20]; u8 rx_dct_connect[0x20]; - u8 reserved_5[0x20]; + u8 reserved_at_160[0x20]; u8 out_of_buffer[0x20]; - u8 reserved_6[0x20]; + u8 reserved_at_1a0[0x20]; u8 out_of_sequence[0x20]; - u8 reserved_7[0x620]; + u8 reserved_at_1e0[0x620]; }; struct mlx5_ifc_query_q_counter_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x80]; + u8 reserved_at_40[0x80]; u8 clear[0x1]; - u8 reserved_3[0x1f]; + u8 reserved_at_c1[0x1f]; - u8 reserved_4[0x18]; + u8 reserved_at_e0[0x18]; u8 counter_set_id[0x8]; }; struct mlx5_ifc_query_pages_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x10]; + u8 reserved_at_40[0x10]; u8 function_id[0x10]; u8 num_pages[0x20]; @@ -3455,55 +3455,55 @@ enum { struct mlx5_ifc_query_pages_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x10]; + u8 reserved_at_40[0x10]; u8 function_id[0x10]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_nic_vport_context_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_nic_vport_context_bits nic_vport_context; }; struct mlx5_ifc_query_nic_vport_context_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_at_41[0xf]; u8 vport_number[0x10]; - u8 reserved_3[0x5]; + u8 reserved_at_60[0x5]; u8 allowed_list_type[0x3]; - u8 reserved_4[0x18]; + u8 reserved_at_68[0x18]; }; struct mlx5_ifc_query_mkey_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_mkc_bits memory_key_mkey_entry; - u8 reserved_2[0x600]; + u8 reserved_at_280[0x600]; u8 bsf0_klm0_pas_mtt0_1[16][0x8]; @@ -3512,265 +3512,265 @@ struct mlx5_ifc_query_mkey_out_bits { struct mlx5_ifc_query_mkey_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 mkey_index[0x18]; u8 pg_access[0x1]; - u8 reserved_3[0x1f]; + u8 reserved_at_61[0x1f]; }; struct mlx5_ifc_query_mad_demux_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; u8 mad_dumux_parameters_block[0x20]; }; struct mlx5_ifc_query_mad_demux_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_query_l2_table_entry_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0xa0]; + u8 reserved_at_40[0xa0]; - u8 reserved_2[0x13]; + u8 reserved_at_e0[0x13]; u8 vlan_valid[0x1]; u8 vlan[0xc]; struct mlx5_ifc_mac_address_layout_bits mac_address; - u8 reserved_3[0xc0]; + u8 reserved_at_140[0xc0]; }; struct mlx5_ifc_query_l2_table_entry_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x60]; + u8 reserved_at_40[0x60]; - u8 reserved_3[0x8]; + u8 reserved_at_a0[0x8]; u8 table_index[0x18]; - u8 reserved_4[0x140]; + u8 reserved_at_c0[0x140]; }; struct mlx5_ifc_query_issi_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x10]; + u8 reserved_at_40[0x10]; u8 current_issi[0x10]; - u8 reserved_2[0xa0]; + u8 reserved_at_60[0xa0]; - u8 supported_issi_reserved[76][0x8]; + u8 reserved_at_100[76][0x8]; u8 supported_issi_dw0[0x20]; }; struct mlx5_ifc_query_issi_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_query_hca_vport_pkey_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_pkey_bits pkey[0]; }; struct mlx5_ifc_query_hca_vport_pkey_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xb]; + u8 reserved_at_41[0xb]; u8 port_num[0x4]; u8 vport_number[0x10]; - u8 reserved_3[0x10]; + u8 reserved_at_60[0x10]; u8 pkey_index[0x10]; }; struct mlx5_ifc_query_hca_vport_gid_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x20]; + u8 reserved_at_40[0x20]; u8 gids_num[0x10]; - u8 reserved_2[0x10]; + u8 reserved_at_70[0x10]; struct mlx5_ifc_array128_auto_bits gid[0]; }; struct mlx5_ifc_query_hca_vport_gid_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xb]; + u8 reserved_at_41[0xb]; u8 port_num[0x4]; u8 vport_number[0x10]; - u8 reserved_3[0x10]; + u8 reserved_at_60[0x10]; u8 gid_index[0x10]; }; struct mlx5_ifc_query_hca_vport_context_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_hca_vport_context_bits hca_vport_context; }; struct mlx5_ifc_query_hca_vport_context_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xb]; + u8 reserved_at_41[0xb]; u8 port_num[0x4]; u8 vport_number[0x10]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_hca_cap_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; union mlx5_ifc_hca_cap_union_bits capability; }; struct mlx5_ifc_query_hca_cap_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_query_flow_table_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x80]; + u8 reserved_at_40[0x80]; - u8 reserved_2[0x8]; + u8 reserved_at_c0[0x8]; u8 level[0x8]; - u8 reserved_3[0x8]; + u8 reserved_at_d0[0x8]; u8 log_size[0x8]; - u8 reserved_4[0x120]; + u8 reserved_at_e0[0x120]; }; struct mlx5_ifc_query_flow_table_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; u8 table_type[0x8]; - u8 reserved_3[0x18]; + u8 reserved_at_88[0x18]; - u8 reserved_4[0x8]; + u8 reserved_at_a0[0x8]; u8 table_id[0x18]; - u8 reserved_5[0x140]; + u8 reserved_at_c0[0x140]; }; struct mlx5_ifc_query_fte_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x1c0]; + u8 reserved_at_40[0x1c0]; struct mlx5_ifc_flow_context_bits flow_context; }; struct mlx5_ifc_query_fte_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; u8 table_type[0x8]; - u8 reserved_3[0x18]; + u8 reserved_at_88[0x18]; - u8 reserved_4[0x8]; + u8 reserved_at_a0[0x8]; u8 table_id[0x18]; - u8 reserved_5[0x40]; + u8 reserved_at_c0[0x40]; u8 flow_index[0x20]; - u8 reserved_6[0xe0]; + u8 reserved_at_120[0xe0]; }; enum { @@ -3781,84 +3781,84 @@ enum { struct mlx5_ifc_query_flow_group_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0xa0]; + u8 reserved_at_40[0xa0]; u8 start_flow_index[0x20]; - u8 reserved_2[0x20]; + u8 reserved_at_100[0x20]; u8 end_flow_index[0x20]; - u8 reserved_3[0xa0]; + u8 reserved_at_140[0xa0]; - u8 reserved_4[0x18]; + u8 reserved_at_1e0[0x18]; u8 match_criteria_enable[0x8]; struct mlx5_ifc_fte_match_param_bits match_criteria; - u8 reserved_5[0xe00]; + u8 reserved_at_1200[0xe00]; }; struct mlx5_ifc_query_flow_group_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; u8 table_type[0x8]; - u8 reserved_3[0x18]; + u8 reserved_at_88[0x18]; - u8 reserved_4[0x8]; + u8 reserved_at_a0[0x8]; u8 table_id[0x18]; u8 group_id[0x20]; - u8 reserved_5[0x120]; + u8 reserved_at_e0[0x120]; }; struct mlx5_ifc_query_esw_vport_context_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_esw_vport_context_bits esw_vport_context; }; struct mlx5_ifc_query_esw_vport_context_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_at_41[0xf]; u8 vport_number[0x10]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_modify_esw_vport_context_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_esw_vport_context_fields_select_bits { - u8 reserved[0x1c]; + u8 reserved_at_0[0x1c]; u8 vport_cvlan_insert[0x1]; u8 vport_svlan_insert[0x1]; u8 vport_cvlan_strip[0x1]; @@ -3867,13 +3867,13 @@ struct mlx5_ifc_esw_vport_context_fields_select_bits { struct mlx5_ifc_modify_esw_vport_context_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_at_41[0xf]; u8 vport_number[0x10]; struct mlx5_ifc_esw_vport_context_fields_select_bits field_select; @@ -3883,124 +3883,124 @@ struct mlx5_ifc_modify_esw_vport_context_in_bits { struct mlx5_ifc_query_eq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_eqc_bits eq_context_entry; - u8 reserved_2[0x40]; + u8 reserved_at_280[0x40]; u8 event_bitmask[0x40]; - u8 reserved_3[0x580]; + u8 reserved_at_300[0x580]; u8 pas[0][0x40]; }; struct mlx5_ifc_query_eq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x18]; + u8 reserved_at_40[0x18]; u8 eq_number[0x8]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_dct_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_dctc_bits dct_context_entry; - u8 reserved_2[0x180]; + u8 reserved_at_280[0x180]; }; struct mlx5_ifc_query_dct_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 dctn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_cq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_cqc_bits cq_context; - u8 reserved_2[0x600]; + u8 reserved_at_280[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_query_cq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 cqn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_cong_status_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x20]; + u8 reserved_at_40[0x20]; u8 enable[0x1]; u8 tag_enable[0x1]; - u8 reserved_2[0x1e]; + u8 reserved_at_62[0x1e]; }; struct mlx5_ifc_query_cong_status_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x18]; + u8 reserved_at_40[0x18]; u8 priority[0x4]; u8 cong_protocol[0x4]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_cong_statistics_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; u8 cur_flows[0x20]; @@ -4014,7 +4014,7 @@ struct mlx5_ifc_query_cong_statistics_out_bits { u8 cnp_handled_low[0x20]; - u8 reserved_2[0x100]; + u8 reserved_at_140[0x100]; u8 time_stamp_high[0x20]; @@ -4030,453 +4030,453 @@ struct mlx5_ifc_query_cong_statistics_out_bits { u8 cnps_sent_low[0x20]; - u8 reserved_3[0x560]; + u8 reserved_at_320[0x560]; }; struct mlx5_ifc_query_cong_statistics_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 clear[0x1]; - u8 reserved_2[0x1f]; + u8 reserved_at_41[0x1f]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_cong_params_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters; }; struct mlx5_ifc_query_cong_params_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x1c]; + u8 reserved_at_40[0x1c]; u8 cong_protocol[0x4]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_adapter_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_query_adapter_param_block_bits query_adapter_struct; }; struct mlx5_ifc_query_adapter_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_qp_2rst_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_qp_2rst_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_qp_2err_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_qp_2err_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_page_fault_resume_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_page_fault_resume_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 error[0x1]; - u8 reserved_2[0x4]; + u8 reserved_at_41[0x4]; u8 rdma[0x1]; u8 read_write[0x1]; u8 req_res[0x1]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_nop_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_nop_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_modify_vport_state_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_modify_vport_state_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_at_41[0xf]; u8 vport_number[0x10]; - u8 reserved_3[0x18]; + u8 reserved_at_60[0x18]; u8 admin_state[0x4]; - u8 reserved_4[0x4]; + u8 reserved_at_7c[0x4]; }; struct mlx5_ifc_modify_tis_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_modify_tis_bitmask_bits { - u8 reserved_0[0x20]; + u8 reserved_at_0[0x20]; - u8 reserved_1[0x1f]; + u8 reserved_at_20[0x1f]; u8 prio[0x1]; }; struct mlx5_ifc_modify_tis_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 tisn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; struct mlx5_ifc_modify_tis_bitmask_bits bitmask; - u8 reserved_4[0x40]; + u8 reserved_at_c0[0x40]; struct mlx5_ifc_tisc_bits ctx; }; struct mlx5_ifc_modify_tir_bitmask_bits { - u8 reserved_0[0x20]; + u8 reserved_at_0[0x20]; - u8 reserved_1[0x1b]; + u8 reserved_at_20[0x1b]; u8 self_lb_en[0x1]; - u8 reserved_2[0x3]; + u8 reserved_at_3c[0x3]; u8 lro[0x1]; }; struct mlx5_ifc_modify_tir_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_modify_tir_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 tirn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; struct mlx5_ifc_modify_tir_bitmask_bits bitmask; - u8 reserved_4[0x40]; + u8 reserved_at_c0[0x40]; struct mlx5_ifc_tirc_bits ctx; }; struct mlx5_ifc_modify_sq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_modify_sq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 sq_state[0x4]; - u8 reserved_2[0x4]; + u8 reserved_at_44[0x4]; u8 sqn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 modify_bitmask[0x40]; - u8 reserved_4[0x40]; + u8 reserved_at_c0[0x40]; struct mlx5_ifc_sqc_bits ctx; }; struct mlx5_ifc_modify_rqt_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_rqt_bitmask_bits { - u8 reserved[0x20]; + u8 reserved_at_0[0x20]; - u8 reserved1[0x1f]; + u8 reserved_at_20[0x1f]; u8 rqn_list[0x1]; }; struct mlx5_ifc_modify_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 rqtn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; struct mlx5_ifc_rqt_bitmask_bits bitmask; - u8 reserved_4[0x40]; + u8 reserved_at_c0[0x40]; struct mlx5_ifc_rqtc_bits ctx; }; struct mlx5_ifc_modify_rq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_modify_rq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 rq_state[0x4]; - u8 reserved_2[0x4]; + u8 reserved_at_44[0x4]; u8 rqn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 modify_bitmask[0x40]; - u8 reserved_4[0x40]; + u8 reserved_at_c0[0x40]; struct mlx5_ifc_rqc_bits ctx; }; struct mlx5_ifc_modify_rmp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_rmp_bitmask_bits { - u8 reserved[0x20]; + u8 reserved_at_0[0x20]; - u8 reserved1[0x1f]; + u8 reserved_at_20[0x1f]; u8 lwm[0x1]; }; struct mlx5_ifc_modify_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 rmp_state[0x4]; - u8 reserved_2[0x4]; + u8 reserved_at_44[0x4]; u8 rmpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; struct mlx5_ifc_rmp_bitmask_bits bitmask; - u8 reserved_4[0x40]; + u8 reserved_at_c0[0x40]; struct mlx5_ifc_rmpc_bits ctx; }; struct mlx5_ifc_modify_nic_vport_context_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_modify_nic_vport_field_select_bits { - u8 reserved_0[0x19]; + u8 reserved_at_0[0x19]; u8 mtu[0x1]; u8 change_event[0x1]; u8 promisc[0x1]; u8 permanent_address[0x1]; u8 addresses_list[0x1]; u8 roce_en[0x1]; - u8 reserved_1[0x1]; + u8 reserved_at_1f[0x1]; }; struct mlx5_ifc_modify_nic_vport_context_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_at_41[0xf]; u8 vport_number[0x10]; struct mlx5_ifc_modify_nic_vport_field_select_bits field_select; - u8 reserved_3[0x780]; + u8 reserved_at_80[0x780]; struct mlx5_ifc_nic_vport_context_bits nic_vport_context; }; struct mlx5_ifc_modify_hca_vport_context_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_modify_hca_vport_context_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xb]; + u8 reserved_at_41[0xb]; u8 port_num[0x4]; u8 vport_number[0x10]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; struct mlx5_ifc_hca_vport_context_bits hca_vport_context; }; struct mlx5_ifc_modify_cq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; enum { @@ -4486,83 +4486,83 @@ enum { struct mlx5_ifc_modify_cq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 cqn[0x18]; union mlx5_ifc_modify_field_select_resize_field_select_auto_bits modify_field_select_resize_field_select; struct mlx5_ifc_cqc_bits cq_context; - u8 reserved_3[0x600]; + u8 reserved_at_280[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_modify_cong_status_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_modify_cong_status_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x18]; + u8 reserved_at_40[0x18]; u8 priority[0x4]; u8 cong_protocol[0x4]; u8 enable[0x1]; u8 tag_enable[0x1]; - u8 reserved_3[0x1e]; + u8 reserved_at_62[0x1e]; }; struct mlx5_ifc_modify_cong_params_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_modify_cong_params_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x1c]; + u8 reserved_at_40[0x1c]; u8 cong_protocol[0x4]; union mlx5_ifc_field_select_802_1_r_roce_auto_bits field_select; - u8 reserved_3[0x80]; + u8 reserved_at_80[0x80]; union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters; }; struct mlx5_ifc_manage_pages_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 output_num_entries[0x20]; - u8 reserved_1[0x20]; + u8 reserved_at_60[0x20]; u8 pas[0][0x40]; }; @@ -4575,12 +4575,12 @@ enum { struct mlx5_ifc_manage_pages_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x10]; + u8 reserved_at_40[0x10]; u8 function_id[0x10]; u8 input_num_entries[0x20]; @@ -4590,117 +4590,117 @@ struct mlx5_ifc_manage_pages_in_bits { struct mlx5_ifc_mad_ifc_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; u8 response_mad_packet[256][0x8]; }; struct mlx5_ifc_mad_ifc_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 remote_lid[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_50[0x8]; u8 port[0x8]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 mad[256][0x8]; }; struct mlx5_ifc_init_hca_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_init_hca_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_init2rtr_qp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_init2rtr_qp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 opt_param_mask[0x20]; - u8 reserved_4[0x20]; + u8 reserved_at_a0[0x20]; struct mlx5_ifc_qpc_bits qpc; - u8 reserved_5[0x80]; + u8 reserved_at_800[0x80]; }; struct mlx5_ifc_init2init_qp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_init2init_qp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 opt_param_mask[0x20]; - u8 reserved_4[0x20]; + u8 reserved_at_a0[0x20]; struct mlx5_ifc_qpc_bits qpc; - u8 reserved_5[0x80]; + u8 reserved_at_800[0x80]; }; struct mlx5_ifc_get_dropped_packet_log_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; u8 packet_headers_log[128][0x8]; @@ -4709,1029 +4709,1029 @@ struct mlx5_ifc_get_dropped_packet_log_out_bits { struct mlx5_ifc_get_dropped_packet_log_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_gen_eqe_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x18]; + u8 reserved_at_40[0x18]; u8 eq_number[0x8]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 eqe[64][0x8]; }; struct mlx5_ifc_gen_eq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_enable_hca_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x20]; + u8 reserved_at_40[0x20]; }; struct mlx5_ifc_enable_hca_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x10]; + u8 reserved_at_40[0x10]; u8 function_id[0x10]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_drain_dct_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_drain_dct_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 dctn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_disable_hca_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x20]; + u8 reserved_at_40[0x20]; }; struct mlx5_ifc_disable_hca_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x10]; + u8 reserved_at_40[0x10]; u8 function_id[0x10]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_detach_from_mcg_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_detach_from_mcg_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 multicast_gid[16][0x8]; }; struct mlx5_ifc_destroy_xrc_srq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 xrc_srqn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_tis_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_tis_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 tisn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_tir_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_tir_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 tirn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_srq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_srq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 srqn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_sq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_sq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 sqn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_rqt_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 rqtn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_rq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_rq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 rqn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_rmp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 rmpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_qp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_qp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_psv_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_psv_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 psvn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_mkey_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_mkey_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 mkey_index[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_flow_table_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_flow_table_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; u8 table_type[0x8]; - u8 reserved_3[0x18]; + u8 reserved_at_88[0x18]; - u8 reserved_4[0x8]; + u8 reserved_at_a0[0x8]; u8 table_id[0x18]; - u8 reserved_5[0x140]; + u8 reserved_at_c0[0x140]; }; struct mlx5_ifc_destroy_flow_group_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_flow_group_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; u8 table_type[0x8]; - u8 reserved_3[0x18]; + u8 reserved_at_88[0x18]; - u8 reserved_4[0x8]; + u8 reserved_at_a0[0x8]; u8 table_id[0x18]; u8 group_id[0x20]; - u8 reserved_5[0x120]; + u8 reserved_at_e0[0x120]; }; struct mlx5_ifc_destroy_eq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_eq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x18]; + u8 reserved_at_40[0x18]; u8 eq_number[0x8]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_dct_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_dct_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 dctn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_cq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_cq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 cqn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_delete_vxlan_udp_dport_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_delete_vxlan_udp_dport_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x20]; + u8 reserved_at_40[0x20]; - u8 reserved_3[0x10]; + u8 reserved_at_60[0x10]; u8 vxlan_udp_port[0x10]; }; struct mlx5_ifc_delete_l2_table_entry_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_delete_l2_table_entry_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x60]; + u8 reserved_at_40[0x60]; - u8 reserved_3[0x8]; + u8 reserved_at_a0[0x8]; u8 table_index[0x18]; - u8 reserved_4[0x140]; + u8 reserved_at_c0[0x140]; }; struct mlx5_ifc_delete_fte_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_delete_fte_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; u8 table_type[0x8]; - u8 reserved_3[0x18]; + u8 reserved_at_88[0x18]; - u8 reserved_4[0x8]; + u8 reserved_at_a0[0x8]; u8 table_id[0x18]; - u8 reserved_5[0x40]; + u8 reserved_at_c0[0x40]; u8 flow_index[0x20]; - u8 reserved_6[0xe0]; + u8 reserved_at_120[0xe0]; }; struct mlx5_ifc_dealloc_xrcd_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_dealloc_xrcd_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 xrcd[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_dealloc_uar_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_dealloc_uar_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 uar[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_dealloc_transport_domain_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_dealloc_transport_domain_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 transport_domain[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_dealloc_q_counter_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_dealloc_q_counter_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x18]; + u8 reserved_at_40[0x18]; u8 counter_set_id[0x8]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_dealloc_pd_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_dealloc_pd_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 pd[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_xrc_srq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 xrc_srqn[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; - u8 reserved_3[0x600]; + u8 reserved_at_280[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_create_tis_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 tisn[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_tis_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0xc0]; + u8 reserved_at_40[0xc0]; struct mlx5_ifc_tisc_bits ctx; }; struct mlx5_ifc_create_tir_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 tirn[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_tir_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0xc0]; + u8 reserved_at_40[0xc0]; struct mlx5_ifc_tirc_bits ctx; }; struct mlx5_ifc_create_srq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 srqn[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_srq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_srqc_bits srq_context_entry; - u8 reserved_3[0x600]; + u8 reserved_at_280[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_create_sq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 sqn[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_sq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0xc0]; + u8 reserved_at_40[0xc0]; struct mlx5_ifc_sqc_bits ctx; }; struct mlx5_ifc_create_rqt_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 rqtn[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0xc0]; + u8 reserved_at_40[0xc0]; struct mlx5_ifc_rqtc_bits rqt_context; }; struct mlx5_ifc_create_rq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 rqn[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_rq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0xc0]; + u8 reserved_at_40[0xc0]; struct mlx5_ifc_rqc_bits ctx; }; struct mlx5_ifc_create_rmp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 rmpn[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0xc0]; + u8 reserved_at_40[0xc0]; struct mlx5_ifc_rmpc_bits ctx; }; struct mlx5_ifc_create_qp_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_qp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; u8 opt_param_mask[0x20]; - u8 reserved_3[0x20]; + u8 reserved_at_a0[0x20]; struct mlx5_ifc_qpc_bits qpc; - u8 reserved_4[0x80]; + u8 reserved_at_800[0x80]; u8 pas[0][0x40]; }; struct mlx5_ifc_create_psv_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; - u8 reserved_2[0x8]; + u8 reserved_at_80[0x8]; u8 psv0_index[0x18]; - u8 reserved_3[0x8]; + u8 reserved_at_a0[0x8]; u8 psv1_index[0x18]; - u8 reserved_4[0x8]; + u8 reserved_at_c0[0x8]; u8 psv2_index[0x18]; - u8 reserved_5[0x8]; + u8 reserved_at_e0[0x8]; u8 psv3_index[0x18]; }; struct mlx5_ifc_create_psv_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 num_psv[0x4]; - u8 reserved_2[0x4]; + u8 reserved_at_44[0x4]; u8 pd[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_mkey_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 mkey_index[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_mkey_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x20]; + u8 reserved_at_40[0x20]; u8 pg_access[0x1]; - u8 reserved_3[0x1f]; + u8 reserved_at_61[0x1f]; struct mlx5_ifc_mkc_bits memory_key_mkey_entry; - u8 reserved_4[0x80]; + u8 reserved_at_280[0x80]; u8 translations_octword_actual_size[0x20]; - u8 reserved_5[0x560]; + u8 reserved_at_320[0x560]; u8 klm_pas_mtt[0][0x20]; }; struct mlx5_ifc_create_flow_table_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 table_id[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_flow_table_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; u8 table_type[0x8]; - u8 reserved_3[0x18]; + u8 reserved_at_88[0x18]; - u8 reserved_4[0x20]; + u8 reserved_at_a0[0x20]; - u8 reserved_5[0x4]; + u8 reserved_at_c0[0x4]; u8 table_miss_mode[0x4]; u8 level[0x8]; - u8 reserved_6[0x8]; + u8 reserved_at_d0[0x8]; u8 log_size[0x8]; - u8 reserved_7[0x8]; + u8 reserved_at_e0[0x8]; u8 table_miss_id[0x18]; - u8 reserved_8[0x100]; + u8 reserved_at_100[0x100]; }; struct mlx5_ifc_create_flow_group_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 group_id[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; enum { @@ -5742,134 +5742,134 @@ enum { struct mlx5_ifc_create_flow_group_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; u8 table_type[0x8]; - u8 reserved_3[0x18]; + u8 reserved_at_88[0x18]; - u8 reserved_4[0x8]; + u8 reserved_at_a0[0x8]; u8 table_id[0x18]; - u8 reserved_5[0x20]; + u8 reserved_at_c0[0x20]; u8 start_flow_index[0x20]; - u8 reserved_6[0x20]; + u8 reserved_at_100[0x20]; u8 end_flow_index[0x20]; - u8 reserved_7[0xa0]; + u8 reserved_at_140[0xa0]; - u8 reserved_8[0x18]; + u8 reserved_at_1e0[0x18]; u8 match_criteria_enable[0x8]; struct mlx5_ifc_fte_match_param_bits match_criteria; - u8 reserved_9[0xe00]; + u8 reserved_at_1200[0xe00]; }; struct mlx5_ifc_create_eq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x18]; + u8 reserved_at_40[0x18]; u8 eq_number[0x8]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_eq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_eqc_bits eq_context_entry; - u8 reserved_3[0x40]; + u8 reserved_at_280[0x40]; u8 event_bitmask[0x40]; - u8 reserved_4[0x580]; + u8 reserved_at_300[0x580]; u8 pas[0][0x40]; }; struct mlx5_ifc_create_dct_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 dctn[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_dct_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_dctc_bits dct_context_entry; - u8 reserved_3[0x180]; + u8 reserved_at_280[0x180]; }; struct mlx5_ifc_create_cq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 cqn[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_cq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; struct mlx5_ifc_cqc_bits cq_context; - u8 reserved_3[0x600]; + u8 reserved_at_280[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_config_int_moderation_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x4]; + u8 reserved_at_40[0x4]; u8 min_delay[0xc]; u8 int_vector[0x10]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; enum { @@ -5879,49 +5879,49 @@ enum { struct mlx5_ifc_config_int_moderation_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x4]; + u8 reserved_at_40[0x4]; u8 min_delay[0xc]; u8 int_vector[0x10]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_attach_to_mcg_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_attach_to_mcg_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; u8 multicast_gid[16][0x8]; }; struct mlx5_ifc_arm_xrc_srq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; enum { @@ -5930,25 +5930,25 @@ enum { struct mlx5_ifc_arm_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 xrc_srqn[0x18]; - u8 reserved_3[0x10]; + u8 reserved_at_60[0x10]; u8 lwm[0x10]; }; struct mlx5_ifc_arm_rq_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; enum { @@ -5957,179 +5957,179 @@ enum { struct mlx5_ifc_arm_rq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 srq_number[0x18]; - u8 reserved_3[0x10]; + u8 reserved_at_60[0x10]; u8 lwm[0x10]; }; struct mlx5_ifc_arm_dct_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_arm_dct_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_40[0x8]; u8 dct_number[0x18]; - u8 reserved_3[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_alloc_xrcd_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 xrcd[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_alloc_xrcd_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_alloc_uar_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 uar[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_alloc_uar_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_alloc_transport_domain_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 transport_domain[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_alloc_transport_domain_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_alloc_q_counter_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x18]; + u8 reserved_at_40[0x18]; u8 counter_set_id[0x8]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_alloc_q_counter_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_alloc_pd_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x8]; + u8 reserved_at_40[0x8]; u8 pd[0x18]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_alloc_pd_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_add_vxlan_udp_dport_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_add_vxlan_udp_dport_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x20]; + u8 reserved_at_40[0x20]; - u8 reserved_3[0x10]; + u8 reserved_at_60[0x10]; u8 vxlan_udp_port[0x10]; }; struct mlx5_ifc_access_register_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; u8 register_data[0][0x20]; }; @@ -6141,12 +6141,12 @@ enum { struct mlx5_ifc_access_register_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x10]; + u8 reserved_at_40[0x10]; u8 register_id[0x10]; u8 argument[0x20]; @@ -6159,24 +6159,24 @@ struct mlx5_ifc_sltp_reg_bits { u8 version[0x4]; u8 local_port[0x8]; u8 pnat[0x2]; - u8 reserved_0[0x2]; + u8 reserved_at_12[0x2]; u8 lane[0x4]; - u8 reserved_1[0x8]; + u8 reserved_at_18[0x8]; - u8 reserved_2[0x20]; + u8 reserved_at_20[0x20]; - u8 reserved_3[0x7]; + u8 reserved_at_40[0x7]; u8 polarity[0x1]; u8 ob_tap0[0x8]; u8 ob_tap1[0x8]; u8 ob_tap2[0x8]; - u8 reserved_4[0xc]; + u8 reserved_at_60[0xc]; u8 ob_preemp_mode[0x4]; u8 ob_reg[0x8]; u8 ob_bias[0x8]; - u8 reserved_5[0x20]; + u8 reserved_at_80[0x20]; }; struct mlx5_ifc_slrg_reg_bits { @@ -6184,36 +6184,36 @@ struct mlx5_ifc_slrg_reg_bits { u8 version[0x4]; u8 local_port[0x8]; u8 pnat[0x2]; - u8 reserved_0[0x2]; + u8 reserved_at_12[0x2]; u8 lane[0x4]; - u8 reserved_1[0x8]; + u8 reserved_at_18[0x8]; u8 time_to_link_up[0x10]; - u8 reserved_2[0xc]; + u8 reserved_at_30[0xc]; u8 grade_lane_speed[0x4]; u8 grade_version[0x8]; u8 grade[0x18]; - u8 reserved_3[0x4]; + u8 reserved_at_60[0x4]; u8 height_grade_type[0x4]; u8 height_grade[0x18]; u8 height_dz[0x10]; u8 height_dv[0x10]; - u8 reserved_4[0x10]; + u8 reserved_at_a0[0x10]; u8 height_sigma[0x10]; - u8 reserved_5[0x20]; + u8 reserved_at_c0[0x20]; - u8 reserved_6[0x4]; + u8 reserved_at_e0[0x4]; u8 phase_grade_type[0x4]; u8 phase_grade[0x18]; - u8 reserved_7[0x8]; + u8 reserved_at_100[0x8]; u8 phase_eo_pos[0x8]; - u8 reserved_8[0x8]; + u8 reserved_at_110[0x8]; u8 phase_eo_neg[0x8]; u8 ffe_set_tested[0x10]; @@ -6221,70 +6221,70 @@ struct mlx5_ifc_slrg_reg_bits { }; struct mlx5_ifc_pvlc_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_2[0x1c]; + u8 reserved_at_20[0x1c]; u8 vl_hw_cap[0x4]; - u8 reserved_3[0x1c]; + u8 reserved_at_40[0x1c]; u8 vl_admin[0x4]; - u8 reserved_4[0x1c]; + u8 reserved_at_60[0x1c]; u8 vl_operational[0x4]; }; struct mlx5_ifc_pude_reg_bits { u8 swid[0x8]; u8 local_port[0x8]; - u8 reserved_0[0x4]; + u8 reserved_at_10[0x4]; u8 admin_status[0x4]; - u8 reserved_1[0x4]; + u8 reserved_at_18[0x4]; u8 oper_status[0x4]; - u8 reserved_2[0x60]; + u8 reserved_at_20[0x60]; }; struct mlx5_ifc_ptys_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0xd]; + u8 reserved_at_10[0xd]; u8 proto_mask[0x3]; - u8 reserved_2[0x40]; + u8 reserved_at_20[0x40]; u8 eth_proto_capability[0x20]; u8 ib_link_width_capability[0x10]; u8 ib_proto_capability[0x10]; - u8 reserved_3[0x20]; + u8 reserved_at_a0[0x20]; u8 eth_proto_admin[0x20]; u8 ib_link_width_admin[0x10]; u8 ib_proto_admin[0x10]; - u8 reserved_4[0x20]; + u8 reserved_at_100[0x20]; u8 eth_proto_oper[0x20]; u8 ib_link_width_oper[0x10]; u8 ib_proto_oper[0x10]; - u8 reserved_5[0x20]; + u8 reserved_at_160[0x20]; u8 eth_proto_lp_advertise[0x20]; - u8 reserved_6[0x60]; + u8 reserved_at_1a0[0x60]; }; struct mlx5_ifc_ptas_reg_bits { - u8 reserved_0[0x20]; + u8 reserved_at_0[0x20]; u8 algorithm_options[0x10]; - u8 reserved_1[0x4]; + u8 reserved_at_30[0x4]; u8 repetitions_mode[0x4]; u8 num_of_repetitions[0x8]; @@ -6310,13 +6310,13 @@ struct mlx5_ifc_ptas_reg_bits { u8 ndeo_error_threshold[0x10]; u8 mixer_offset_step_size[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_110[0x8]; u8 mix90_phase_for_voltage_bath[0x8]; u8 mixer_offset_start[0x10]; u8 mixer_offset_end[0x10]; - u8 reserved_3[0x15]; + u8 reserved_at_140[0x15]; u8 ber_test_time[0xb]; }; @@ -6324,154 +6324,154 @@ struct mlx5_ifc_pspa_reg_bits { u8 swid[0x8]; u8 local_port[0x8]; u8 sub_port[0x8]; - u8 reserved_0[0x8]; + u8 reserved_at_18[0x8]; - u8 reserved_1[0x20]; + u8 reserved_at_20[0x20]; }; struct mlx5_ifc_pqdr_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0x5]; + u8 reserved_at_10[0x5]; u8 prio[0x3]; - u8 reserved_2[0x6]; + u8 reserved_at_18[0x6]; u8 mode[0x2]; - u8 reserved_3[0x20]; + u8 reserved_at_20[0x20]; - u8 reserved_4[0x10]; + u8 reserved_at_40[0x10]; u8 min_threshold[0x10]; - u8 reserved_5[0x10]; + u8 reserved_at_60[0x10]; u8 max_threshold[0x10]; - u8 reserved_6[0x10]; + u8 reserved_at_80[0x10]; u8 mark_probability_denominator[0x10]; - u8 reserved_7[0x60]; + u8 reserved_at_a0[0x60]; }; struct mlx5_ifc_ppsc_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_2[0x60]; + u8 reserved_at_20[0x60]; - u8 reserved_3[0x1c]; + u8 reserved_at_80[0x1c]; u8 wrps_admin[0x4]; - u8 reserved_4[0x1c]; + u8 reserved_at_a0[0x1c]; u8 wrps_status[0x4]; - u8 reserved_5[0x8]; + u8 reserved_at_c0[0x8]; u8 up_threshold[0x8]; - u8 reserved_6[0x8]; + u8 reserved_at_d0[0x8]; u8 down_threshold[0x8]; - u8 reserved_7[0x20]; + u8 reserved_at_e0[0x20]; - u8 reserved_8[0x1c]; + u8 reserved_at_100[0x1c]; u8 srps_admin[0x4]; - u8 reserved_9[0x1c]; + u8 reserved_at_120[0x1c]; u8 srps_status[0x4]; - u8 reserved_10[0x40]; + u8 reserved_at_140[0x40]; }; struct mlx5_ifc_pplr_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_2[0x8]; + u8 reserved_at_20[0x8]; u8 lb_cap[0x8]; - u8 reserved_3[0x8]; + u8 reserved_at_30[0x8]; u8 lb_en[0x8]; }; struct mlx5_ifc_pplm_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_2[0x20]; + u8 reserved_at_20[0x20]; u8 port_profile_mode[0x8]; u8 static_port_profile[0x8]; u8 active_port_profile[0x8]; - u8 reserved_3[0x8]; + u8 reserved_at_58[0x8]; u8 retransmission_active[0x8]; u8 fec_mode_active[0x18]; - u8 reserved_4[0x20]; + u8 reserved_at_80[0x20]; }; struct mlx5_ifc_ppcnt_reg_bits { u8 swid[0x8]; u8 local_port[0x8]; u8 pnat[0x2]; - u8 reserved_0[0x8]; + u8 reserved_at_12[0x8]; u8 grp[0x6]; u8 clr[0x1]; - u8 reserved_1[0x1c]; + u8 reserved_at_21[0x1c]; u8 prio_tc[0x3]; union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; }; struct mlx5_ifc_ppad_reg_bits { - u8 reserved_0[0x3]; + u8 reserved_at_0[0x3]; u8 single_mac[0x1]; - u8 reserved_1[0x4]; + u8 reserved_at_4[0x4]; u8 local_port[0x8]; u8 mac_47_32[0x10]; u8 mac_31_0[0x20]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_pmtu_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; u8 max_mtu[0x10]; - u8 reserved_2[0x10]; + u8 reserved_at_30[0x10]; u8 admin_mtu[0x10]; - u8 reserved_3[0x10]; + u8 reserved_at_50[0x10]; u8 oper_mtu[0x10]; - u8 reserved_4[0x10]; + u8 reserved_at_70[0x10]; }; struct mlx5_ifc_pmpr_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 module[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_2[0x18]; + u8 reserved_at_20[0x18]; u8 attenuation_5g[0x8]; - u8 reserved_3[0x18]; + u8 reserved_at_40[0x18]; u8 attenuation_7g[0x8]; - u8 reserved_4[0x18]; + u8 reserved_at_60[0x18]; u8 attenuation_12g[0x8]; }; struct mlx5_ifc_pmpe_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 module[0x8]; - u8 reserved_1[0xc]; + u8 reserved_at_10[0xc]; u8 module_status[0x4]; - u8 reserved_2[0x60]; + u8 reserved_at_20[0x60]; }; struct mlx5_ifc_pmpc_reg_bits { @@ -6479,20 +6479,20 @@ struct mlx5_ifc_pmpc_reg_bits { }; struct mlx5_ifc_pmlpn_reg_bits { - u8 reserved_0[0x4]; + u8 reserved_at_0[0x4]; u8 mlpn_status[0x4]; u8 local_port[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; u8 e[0x1]; - u8 reserved_2[0x1f]; + u8 reserved_at_21[0x1f]; }; struct mlx5_ifc_pmlp_reg_bits { u8 rxtx[0x1]; - u8 reserved_0[0x7]; + u8 reserved_at_1[0x7]; u8 local_port[0x8]; - u8 reserved_1[0x8]; + u8 reserved_at_10[0x8]; u8 width[0x8]; u8 lane0_module_mapping[0x20]; @@ -6503,36 +6503,36 @@ struct mlx5_ifc_pmlp_reg_bits { u8 lane3_module_mapping[0x20]; - u8 reserved_2[0x160]; + u8 reserved_at_a0[0x160]; }; struct mlx5_ifc_pmaos_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 module[0x8]; - u8 reserved_1[0x4]; + u8 reserved_at_10[0x4]; u8 admin_status[0x4]; - u8 reserved_2[0x4]; + u8 reserved_at_18[0x4]; u8 oper_status[0x4]; u8 ase[0x1]; u8 ee[0x1]; - u8 reserved_3[0x1c]; + u8 reserved_at_22[0x1c]; u8 e[0x2]; - u8 reserved_4[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_plpc_reg_bits { - u8 reserved_0[0x4]; + u8 reserved_at_0[0x4]; u8 profile_id[0xc]; - u8 reserved_1[0x4]; + u8 reserved_at_10[0x4]; u8 proto_mask[0x4]; - u8 reserved_2[0x8]; + u8 reserved_at_18[0x8]; - u8 reserved_3[0x10]; + u8 reserved_at_20[0x10]; u8 lane_speed[0x10]; - u8 reserved_4[0x17]; + u8 reserved_at_40[0x17]; u8 lpbf[0x1]; u8 fec_mode_policy[0x8]; @@ -6545,44 +6545,44 @@ struct mlx5_ifc_plpc_reg_bits { u8 retransmission_request_admin[0x8]; u8 fec_mode_request_admin[0x18]; - u8 reserved_5[0x80]; + u8 reserved_at_c0[0x80]; }; struct mlx5_ifc_plib_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0x8]; + u8 reserved_at_10[0x8]; u8 ib_port[0x8]; - u8 reserved_2[0x60]; + u8 reserved_at_20[0x60]; }; struct mlx5_ifc_plbf_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0xd]; + u8 reserved_at_10[0xd]; u8 lbf_mode[0x3]; - u8 reserved_2[0x20]; + u8 reserved_at_20[0x20]; }; struct mlx5_ifc_pipg_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; u8 dic[0x1]; - u8 reserved_2[0x19]; + u8 reserved_at_21[0x19]; u8 ipg[0x4]; - u8 reserved_3[0x2]; + u8 reserved_at_3e[0x2]; }; struct mlx5_ifc_pifr_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_2[0xe0]; + u8 reserved_at_20[0xe0]; u8 port_filter[8][0x20]; @@ -6590,36 +6590,36 @@ struct mlx5_ifc_pifr_reg_bits { }; struct mlx5_ifc_pfcc_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; u8 ppan[0x4]; - u8 reserved_2[0x4]; + u8 reserved_at_24[0x4]; u8 prio_mask_tx[0x8]; - u8 reserved_3[0x8]; + u8 reserved_at_30[0x8]; u8 prio_mask_rx[0x8]; u8 pptx[0x1]; u8 aptx[0x1]; - u8 reserved_4[0x6]; + u8 reserved_at_42[0x6]; u8 pfctx[0x8]; - u8 reserved_5[0x10]; + u8 reserved_at_50[0x10]; u8 pprx[0x1]; u8 aprx[0x1]; - u8 reserved_6[0x6]; + u8 reserved_at_62[0x6]; u8 pfcrx[0x8]; - u8 reserved_7[0x10]; + u8 reserved_at_70[0x10]; - u8 reserved_8[0x80]; + u8 reserved_at_80[0x80]; }; struct mlx5_ifc_pelc_reg_bits { u8 op[0x4]; - u8 reserved_0[0x4]; + u8 reserved_at_4[0x4]; u8 local_port[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; u8 op_admin[0x8]; u8 op_capability[0x8]; @@ -6634,28 +6634,28 @@ struct mlx5_ifc_pelc_reg_bits { u8 active[0x40]; - u8 reserved_2[0x80]; + u8 reserved_at_140[0x80]; }; struct mlx5_ifc_peir_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_2[0xc]; + u8 reserved_at_20[0xc]; u8 error_count[0x4]; - u8 reserved_3[0x10]; + u8 reserved_at_30[0x10]; - u8 reserved_4[0xc]; + u8 reserved_at_40[0xc]; u8 lane[0x4]; - u8 reserved_5[0x8]; + u8 reserved_at_50[0x8]; u8 error_type[0x8]; }; struct mlx5_ifc_pcap_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_1[0x10]; + u8 reserved_at_10[0x10]; u8 port_capability_mask[4][0x20]; }; @@ -6663,46 +6663,46 @@ struct mlx5_ifc_pcap_reg_bits { struct mlx5_ifc_paos_reg_bits { u8 swid[0x8]; u8 local_port[0x8]; - u8 reserved_0[0x4]; + u8 reserved_at_10[0x4]; u8 admin_status[0x4]; - u8 reserved_1[0x4]; + u8 reserved_at_18[0x4]; u8 oper_status[0x4]; u8 ase[0x1]; u8 ee[0x1]; - u8 reserved_2[0x1c]; + u8 reserved_at_22[0x1c]; u8 e[0x2]; - u8 reserved_3[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_pamp_reg_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 opamp_group[0x8]; - u8 reserved_1[0xc]; + u8 reserved_at_10[0xc]; u8 opamp_group_type[0x4]; u8 start_index[0x10]; - u8 reserved_2[0x4]; + u8 reserved_at_30[0x4]; u8 num_of_indices[0xc]; u8 index_data[18][0x10]; }; struct mlx5_ifc_lane_2_module_mapping_bits { - u8 reserved_0[0x6]; + u8 reserved_at_0[0x6]; u8 rx_lane[0x2]; - u8 reserved_1[0x6]; + u8 reserved_at_8[0x6]; u8 tx_lane[0x2]; - u8 reserved_2[0x8]; + u8 reserved_at_10[0x8]; u8 module[0x8]; }; struct mlx5_ifc_bufferx_reg_bits { - u8 reserved_0[0x6]; + u8 reserved_at_0[0x6]; u8 lossy[0x1]; u8 epsb[0x1]; - u8 reserved_1[0xc]; + u8 reserved_at_8[0xc]; u8 size[0xc]; u8 xoff_threshold[0x10]; @@ -6714,21 +6714,21 @@ struct mlx5_ifc_set_node_in_bits { }; struct mlx5_ifc_register_power_settings_bits { - u8 reserved_0[0x18]; + u8 reserved_at_0[0x18]; u8 power_settings_level[0x8]; - u8 reserved_1[0x60]; + u8 reserved_at_20[0x60]; }; struct mlx5_ifc_register_host_endianness_bits { u8 he[0x1]; - u8 reserved_0[0x1f]; + u8 reserved_at_1[0x1f]; - u8 reserved_1[0x60]; + u8 reserved_at_20[0x60]; }; struct mlx5_ifc_umr_pointer_desc_argument_bits { - u8 reserved_0[0x20]; + u8 reserved_at_0[0x20]; u8 mkey[0x20]; @@ -6741,7 +6741,7 @@ struct mlx5_ifc_ud_adrs_vector_bits { u8 dc_key[0x40]; u8 ext[0x1]; - u8 reserved_0[0x7]; + u8 reserved_at_41[0x7]; u8 destination_qp_dct[0x18]; u8 static_rate[0x4]; @@ -6750,7 +6750,7 @@ struct mlx5_ifc_ud_adrs_vector_bits { u8 mlid[0x7]; u8 rlid_udp_sport[0x10]; - u8 reserved_1[0x20]; + u8 reserved_at_80[0x20]; u8 rmac_47_16[0x20]; @@ -6758,9 +6758,9 @@ struct mlx5_ifc_ud_adrs_vector_bits { u8 tclass[0x8]; u8 hop_limit[0x8]; - u8 reserved_2[0x1]; + u8 reserved_at_e0[0x1]; u8 grh[0x1]; - u8 reserved_3[0x2]; + u8 reserved_at_e2[0x2]; u8 src_addr_index[0x8]; u8 flow_label[0x14]; @@ -6768,27 +6768,27 @@ struct mlx5_ifc_ud_adrs_vector_bits { }; struct mlx5_ifc_pages_req_event_bits { - u8 reserved_0[0x10]; + u8 reserved_at_0[0x10]; u8 function_id[0x10]; u8 num_pages[0x20]; - u8 reserved_1[0xa0]; + u8 reserved_at_40[0xa0]; }; struct mlx5_ifc_eqe_bits { - u8 reserved_0[0x8]; + u8 reserved_at_0[0x8]; u8 event_type[0x8]; - u8 reserved_1[0x8]; + u8 reserved_at_10[0x8]; u8 event_sub_type[0x8]; - u8 reserved_2[0xe0]; + u8 reserved_at_20[0xe0]; union mlx5_ifc_event_auto_bits event_data; - u8 reserved_3[0x10]; + u8 reserved_at_1e0[0x10]; u8 signature[0x8]; - u8 reserved_4[0x7]; + u8 reserved_at_1f8[0x7]; u8 owner[0x1]; }; @@ -6798,14 +6798,14 @@ enum { struct mlx5_ifc_cmd_queue_entry_bits { u8 type[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 input_length[0x20]; u8 input_mailbox_pointer_63_32[0x20]; u8 input_mailbox_pointer_31_9[0x17]; - u8 reserved_1[0x9]; + u8 reserved_at_77[0x9]; u8 command_input_inline_data[16][0x8]; @@ -6814,20 +6814,20 @@ struct mlx5_ifc_cmd_queue_entry_bits { u8 output_mailbox_pointer_63_32[0x20]; u8 output_mailbox_pointer_31_9[0x17]; - u8 reserved_2[0x9]; + u8 reserved_at_1b7[0x9]; u8 output_length[0x20]; u8 token[0x8]; u8 signature[0x8]; - u8 reserved_3[0x8]; + u8 reserved_at_1f0[0x8]; u8 status[0x7]; u8 ownership[0x1]; }; struct mlx5_ifc_cmd_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; @@ -6836,9 +6836,9 @@ struct mlx5_ifc_cmd_out_bits { struct mlx5_ifc_cmd_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 command[0][0x20]; @@ -6847,16 +6847,16 @@ struct mlx5_ifc_cmd_in_bits { struct mlx5_ifc_cmd_if_box_bits { u8 mailbox_data[512][0x8]; - u8 reserved_0[0x180]; + u8 reserved_at_1000[0x180]; u8 next_pointer_63_32[0x20]; u8 next_pointer_31_10[0x16]; - u8 reserved_1[0xa]; + u8 reserved_at_11b6[0xa]; u8 block_number[0x20]; - u8 reserved_2[0x8]; + u8 reserved_at_11e0[0x8]; u8 token[0x8]; u8 ctrl_signature[0x8]; u8 signature[0x8]; @@ -6866,7 +6866,7 @@ struct mlx5_ifc_mtt_bits { u8 ptag_63_32[0x20]; u8 ptag_31_8[0x18]; - u8 reserved_0[0x6]; + u8 reserved_at_38[0x6]; u8 wr_en[0x1]; u8 rd_en[0x1]; }; @@ -6904,38 +6904,38 @@ struct mlx5_ifc_initial_seg_bits { u8 cmd_interface_rev[0x10]; u8 fw_rev_subminor[0x10]; - u8 reserved_0[0x40]; + u8 reserved_at_40[0x40]; u8 cmdq_phy_addr_63_32[0x20]; u8 cmdq_phy_addr_31_12[0x14]; - u8 reserved_1[0x2]; + u8 reserved_at_b4[0x2]; u8 nic_interface[0x2]; u8 log_cmdq_size[0x4]; u8 log_cmdq_stride[0x4]; u8 command_doorbell_vector[0x20]; - u8 reserved_2[0xf00]; + u8 reserved_at_e0[0xf00]; u8 initializing[0x1]; - u8 reserved_3[0x4]; + u8 reserved_at_fe1[0x4]; u8 nic_interface_supported[0x3]; - u8 reserved_4[0x18]; + u8 reserved_at_fe8[0x18]; struct mlx5_ifc_health_buffer_bits health_buffer; u8 no_dram_nic_offset[0x20]; - u8 reserved_5[0x6e40]; + u8 reserved_at_1220[0x6e40]; - u8 reserved_6[0x1f]; + u8 reserved_at_8060[0x1f]; u8 clear_int[0x1]; u8 health_syndrome[0x8]; u8 health_counter[0x18]; - u8 reserved_7[0x17fc0]; + u8 reserved_at_80a0[0x17fc0]; }; union mlx5_ifc_ports_control_registers_document_bits { @@ -6980,44 +6980,44 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pvlc_reg_bits pvlc_reg; struct mlx5_ifc_slrg_reg_bits slrg_reg; struct mlx5_ifc_sltp_reg_bits sltp_reg; - u8 reserved_0[0x60e0]; + u8 reserved_at_0[0x60e0]; }; union mlx5_ifc_debug_enhancements_document_bits { struct mlx5_ifc_health_buffer_bits health_buffer; - u8 reserved_0[0x200]; + u8 reserved_at_0[0x200]; }; union mlx5_ifc_uplink_pci_interface_document_bits { struct mlx5_ifc_initial_seg_bits initial_seg; - u8 reserved_0[0x20060]; + u8 reserved_at_0[0x20060]; }; struct mlx5_ifc_set_flow_table_root_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_set_flow_table_root_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x40]; u8 table_type[0x8]; - u8 reserved_3[0x18]; + u8 reserved_at_88[0x18]; - u8 reserved_4[0x8]; + u8 reserved_at_a0[0x8]; u8 table_id[0x18]; - u8 reserved_5[0x140]; + u8 reserved_at_c0[0x140]; }; enum { @@ -7026,39 +7026,39 @@ enum { struct mlx5_ifc_modify_flow_table_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x40]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_modify_flow_table_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x20]; + u8 reserved_at_40[0x20]; - u8 reserved_3[0x10]; + u8 reserved_at_60[0x10]; u8 modify_field_select[0x10]; u8 table_type[0x8]; - u8 reserved_4[0x18]; + u8 reserved_at_88[0x18]; - u8 reserved_5[0x8]; + u8 reserved_at_a0[0x8]; u8 table_id[0x18]; - u8 reserved_6[0x4]; + u8 reserved_at_c0[0x4]; u8 table_miss_mode[0x4]; - u8 reserved_7[0x18]; + u8 reserved_at_c8[0x18]; - u8 reserved_8[0x8]; + u8 reserved_at_e0[0x8]; u8 table_miss_id[0x18]; - u8 reserved_9[0x100]; + u8 reserved_at_100[0x100]; }; #endif /* MLX5_IFC_H */ -- cgit v1.2.3 From 85743f1eb34548ba4b056d2f184a3d107a3b8917 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 17 Feb 2016 17:24:26 +0200 Subject: net/mlx4_core: Set UAR page size to 4KB regardless of system page size problem description: The current code sets UAR page size equal to system page size. The ConnectX-3 and ConnectX-3 Pro HWs require minimum 128 UAR pages. The mlx4 kernel drivers are not loaded if there is less than 128 UAR pages. solution: Always set UAR page to 4KB. This allows more UAR pages if the OS has PAGE_SIZE larger than 4KB. For example, PowerPC kernel use 64KB system page size, with 4MB uar region, there are 4MB/2/64KB = 32 uars (half for uar, half for blueflame). This does not meet minimum 128 UAR pages requirement. With 4KB UAR page, there are 4MB/2/4KB = 512 uars which meet the minimum requirement. Note that only codes in mlx4_core that deal with firmware know that uar page size is 4KB. Codes that deal with usr page in cq and qp context (mlx4_ib, mlx4_en and part of mlx4_core) still have the same assumption that uar page size equals to system page size. Note that with this implementation, on 64KB system page size kernel, there are 16 uars per system page but only one uars is used. The other 15 uars are ignored because of the above assumption. Regarding SR-IOV, mlx4_core in hypervisor will set the uar page size to 4KB and mlx4_core code in virtual OS will obtain the uar page size from firmware. Regarding backward compatibility in SR-IOV, if hypervisor has this new code, the virtual OS must be updated. If hypervisor has old code, and the virtual OS has this new code, the new code will be backward compatible with the old code. If the uar size is big enough, this new code in VF continues to work with 64 KB uar page size (on PowerPc kernel). If the uar size does not meet 128 uars requirement, this new code not loaded in VF and print the same error message as the old code in Hypervisor. Signed-off-by: Huy Nguyen Reviewed-by: Yishai Hadas Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/qp.c | 7 ++- drivers/net/ethernet/mellanox/mlx4/cq.c | 4 +- drivers/net/ethernet/mellanox/mlx4/en_resources.c | 3 +- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 4 +- drivers/net/ethernet/mellanox/mlx4/eq.c | 7 +-- drivers/net/ethernet/mellanox/mlx4/main.c | 56 ++++++++++++++++++----- drivers/net/ethernet/mellanox/mlx4/pd.c | 12 +++-- include/linux/mlx4/device.h | 13 ++++++ 8 files changed, 84 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index bc5536f00b6c..fd97534762b8 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1681,9 +1681,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (qp->ibqp.uobject) - context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); + context->usr_page = cpu_to_be32( + mlx4_to_hw_uar_index(dev->dev, + to_mucontext(ibqp->uobject->context)->uar.index)); else - context->usr_page = cpu_to_be32(dev->priv_uar.index); + context->usr_page = cpu_to_be32( + mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index)); if (attr_mask & IB_QP_DEST_QPN) context->remote_qpn = cpu_to_be32(attr->dest_qp_num); diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 3348e646db70..a849da92f857 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -318,7 +318,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, if (timestamp_en) cq_context->flags |= cpu_to_be32(1 << 19); - cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); + cq_context->logsize_usrpage = + cpu_to_be32((ilog2(nent) << 24) | + mlx4_to_hw_uar_index(dev, uar->index)); cq_context->comp_eqn = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 12aab5a659d3..02e925d6f734 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -58,7 +58,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, } else { context->sq_size_stride = ilog2(TXBB_SIZE) - 4; } - context->usr_page = cpu_to_be32(mdev->priv_uar.index); + context->usr_page = cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev, + mdev->priv_uar.index)); context->local_qpn = cpu_to_be32(qpn); context->pri_path.ackto = 1 & 0x07; context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 4421bf5463f6..e0946ab22010 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -213,7 +213,9 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn, ring->cqn, user_prio, &ring->context); if (ring->bf_alloced) - ring->context.usr_page = cpu_to_be32(ring->bf.uar->index); + ring->context.usr_page = + cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev, + ring->bf.uar->index)); err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context, &ring->qp, &ring->qp_state); diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 4696053165f8..f613977455e0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -940,9 +940,10 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) if (!priv->eq_table.uar_map[index]) { priv->eq_table.uar_map[index] = - ioremap(pci_resource_start(dev->persist->pdev, 2) + - ((eq->eqn / 4) << PAGE_SHIFT), - PAGE_SIZE); + ioremap( + pci_resource_start(dev->persist->pdev, 2) + + ((eq->eqn / 4) << (dev->uar_page_shift)), + (1 << (dev->uar_page_shift))); if (!priv->eq_table.uar_map[index]) { mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n", eq->eqn); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index f1b6d219e445..2cc3c626c3fe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -168,6 +168,20 @@ struct mlx4_port_config { static atomic_t pf_loading = ATOMIC_INIT(0); +static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap) +{ + /* The reserved_uars is calculated by system page size unit. + * Therefore, adjustment is added when the uar page size is less + * than the system page size + */ + dev->caps.reserved_uars = + max_t(int, + mlx4_get_num_reserved_uar(dev), + dev_cap->reserved_uars / + (1 << (PAGE_SHIFT - dev->uar_page_shift))); +} + int mlx4_check_port_params(struct mlx4_dev *dev, enum mlx4_port_type *port_type) { @@ -386,8 +400,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_mtts = dev_cap->reserved_mtts; dev->caps.reserved_mrws = dev_cap->reserved_mrws; - /* The first 128 UARs are used for EQ doorbells */ - dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars); dev->caps.reserved_pds = dev_cap->reserved_pds; dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? dev_cap->reserved_xrcds : 0; @@ -405,6 +417,15 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; + /* Save uar page shift */ + if (!mlx4_is_slave(dev)) { + /* Virtual PCI function needs to determine UAR page size from + * firmware. Only master PCI function can set the uar page size + */ + dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT; + mlx4_set_num_reserved_uars(dev, dev_cap); + } + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) { struct mlx4_init_hca_param hca_param; @@ -815,16 +836,25 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } - /* slave gets uar page size from QUERY_HCA fw command */ - dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12); + /* Set uar_page_shift for VF */ + dev->uar_page_shift = hca_param.uar_page_sz + 12; - /* TODO: relax this assumption */ - if (dev->caps.uar_page_size != PAGE_SIZE) { - mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n", - dev->caps.uar_page_size, PAGE_SIZE); - return -ENODEV; + /* Make sure the master uar page size is valid */ + if (dev->uar_page_shift > PAGE_SHIFT) { + mlx4_err(dev, + "Invalid configuration: uar page size is larger than system page size\n"); + return -ENODEV; } + /* Set reserved_uars based on the uar_page_shift */ + mlx4_set_num_reserved_uars(dev, &dev_cap); + + /* Although uar page size in FW differs from system page size, + * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core) + * still works with assumption that uar page size == system page size + */ + dev->caps.uar_page_size = PAGE_SIZE; + memset(&func_cap, 0, sizeof(func_cap)); err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); if (err) { @@ -2179,8 +2209,12 @@ static int mlx4_init_hca(struct mlx4_dev *dev) dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; - init_hca.log_uar_sz = ilog2(dev->caps.num_uars); - init_hca.uar_page_sz = PAGE_SHIFT - 12; + /* Always set UAR page size 4KB, set log_uar_sz accordingly */ + init_hca.log_uar_sz = ilog2(dev->caps.num_uars) + + PAGE_SHIFT - + DEFAULT_UAR_PAGE_SHIFT; + init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12; + init_hca.mw_enabled = 0; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c index 609c59dc854e..b3cc3ab63799 100644 --- a/drivers/net/ethernet/mellanox/mlx4/pd.c +++ b/drivers/net/ethernet/mellanox/mlx4/pd.c @@ -269,9 +269,15 @@ EXPORT_SYMBOL_GPL(mlx4_bf_free); int mlx4_init_uar_table(struct mlx4_dev *dev) { - if (dev->caps.num_uars <= 128) { - mlx4_err(dev, "Only %d UAR pages (need more than 128)\n", - dev->caps.num_uars); + int num_reserved_uar = mlx4_get_num_reserved_uar(dev); + + mlx4_dbg(dev, "uar_page_shift = %d", dev->uar_page_shift); + mlx4_dbg(dev, "Effective reserved_uars=%d", dev->caps.reserved_uars); + + if (dev->caps.num_uars <= num_reserved_uar) { + mlx4_err( + dev, "Only %d UAR pages (need more than %d)\n", + dev->caps.num_uars, num_reserved_uar); mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n"); return -ENODEV; } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 430a929f048b..a0e8cc8dcc67 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -44,6 +44,8 @@ #include +#define DEFAULT_UAR_PAGE_SHIFT 12 + #define MAX_MSIX_P_PORT 17 #define MAX_MSIX 64 #define MIN_MSIX_P_PORT 5 @@ -856,6 +858,7 @@ struct mlx4_dev { u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; struct mlx4_vf_dev *dev_vfs; + u8 uar_page_shift; }; struct mlx4_clock_params { @@ -1528,4 +1531,14 @@ int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev, int mlx4_get_internal_clock_params(struct mlx4_dev *dev, struct mlx4_clock_params *params); +static inline int mlx4_to_hw_uar_index(struct mlx4_dev *dev, int index) +{ + return (index << (PAGE_SHIFT - dev->uar_page_shift)); +} + +static inline int mlx4_get_num_reserved_uar(struct mlx4_dev *dev) +{ + /* The first 128 UARs are used for EQ doorbells */ + return (128 >> (PAGE_SHIFT - dev->uar_page_shift)); +} #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 7dcd182bec271ab341b05b66b6006995795fc0e7 Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Tue, 16 Feb 2016 17:32:33 -0500 Subject: ftrace/module: remove ftrace module notifier Remove the ftrace module notifier in favor of directly calling ftrace_module_enable() and ftrace_release_mod() in the module loader. Hard-coding the function calls directly in the module loader removes dependence on the module notifier call chain and provides better visibility and control over what gets called when, which is important to kernel utilities such as livepatch. This fixes a notifier ordering issue in which the ftrace module notifier (and hence ftrace_module_enable()) for coming modules was being called after klp_module_notify(), which caused livepatch modules to initialize incorrectly. This patch removes dependence on the module notifier call chain in favor of hard coding the corresponding function calls in the module loader. This ensures that ftrace and livepatch code get called in the correct order on patch module load and unload. Fixes: 5156dca34a3e ("ftrace: Fix the race between ftrace and insmod") Signed-off-by: Jessica Yu Reviewed-by: Steven Rostedt Reviewed-by: Petr Mladek Acked-by: Rusty Russell Reviewed-by: Josh Poimboeuf Reviewed-by: Miroslav Benes Signed-off-by: Jiri Kosina --- include/linux/ftrace.h | 6 ++++-- kernel/module.c | 4 ++++ kernel/trace/ftrace.c | 36 +----------------------------------- 3 files changed, 9 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0639dcc98195..76dc15e171eb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -604,6 +604,7 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); extern void ftrace_module_init(struct module *mod); +extern void ftrace_module_enable(struct module *mod); extern void ftrace_release_mod(struct module *mod); extern void ftrace_disable_daemon(void); @@ -613,8 +614,9 @@ static inline int skip_trace(unsigned long ip) { return 0; } static inline int ftrace_force_update(void) { return 0; } static inline void ftrace_disable_daemon(void) { } static inline void ftrace_enable_daemon(void) { } -static inline void ftrace_release_mod(struct module *mod) {} -static inline void ftrace_module_init(struct module *mod) {} +static inline void ftrace_module_init(struct module *mod) { } +static inline void ftrace_module_enable(struct module *mod) { } +static inline void ftrace_release_mod(struct module *mod) { } static inline __init int register_ftrace_command(struct ftrace_func_command *cmd) { return -EINVAL; diff --git a/kernel/module.c b/kernel/module.c index 8358f4697c0c..b05d466cfa02 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -981,6 +981,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, mod->exit(); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); + ftrace_release_mod(mod); + async_synchronize_full(); /* Store the name of the last unloaded module for diagnostic purposes */ @@ -3295,6 +3297,7 @@ fail: module_put(mod); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); + ftrace_release_mod(mod); free_module(mod); wake_up_all(&module_wq); return ret; @@ -3371,6 +3374,7 @@ static int complete_formation(struct module *mod, struct load_info *info) mod->state = MODULE_STATE_COMING; mutex_unlock(&module_mutex); + ftrace_module_enable(mod); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); return 0; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index eca592f977b2..57a6eea84694 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4961,7 +4961,7 @@ void ftrace_release_mod(struct module *mod) mutex_unlock(&ftrace_lock); } -static void ftrace_module_enable(struct module *mod) +void ftrace_module_enable(struct module *mod) { struct dyn_ftrace *rec; struct ftrace_page *pg; @@ -5038,38 +5038,8 @@ void ftrace_module_init(struct module *mod) ftrace_process_locs(mod, mod->ftrace_callsites, mod->ftrace_callsites + mod->num_ftrace_callsites); } - -static int ftrace_module_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct module *mod = data; - - switch (val) { - case MODULE_STATE_COMING: - ftrace_module_enable(mod); - break; - case MODULE_STATE_GOING: - ftrace_release_mod(mod); - break; - default: - break; - } - - return 0; -} -#else -static int ftrace_module_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - return 0; -} #endif /* CONFIG_MODULES */ -struct notifier_block ftrace_module_nb = { - .notifier_call = ftrace_module_notify, - .priority = INT_MIN, /* Run after anything that can remove kprobes */ -}; - void __init ftrace_init(void) { extern unsigned long __start_mcount_loc[]; @@ -5098,10 +5068,6 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); - ret = register_module_notifier(&ftrace_module_nb); - if (ret) - pr_warning("Failed to register trace ftrace module exit notifier\n"); - set_ftrace_early_filters(); return; -- cgit v1.2.3 From deed49df7390d5239024199e249190328f1651e7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 18 Feb 2016 21:21:19 +0800 Subject: route: check and remove route cache when we get route Since the gc of ipv4 route was removed, the route cached would has no chance to be removed, and even it has been timeout, it still could be used, cause no code to check it's expires. Fix this issue by checking and removing route cache when we get route. Signed-off-by: Xin Long Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/route.c | 77 ++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 64 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 7029527725dd..4079fc18ffe4 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -61,6 +61,7 @@ struct fib_nh_exception { struct rtable __rcu *fnhe_rth_input; struct rtable __rcu *fnhe_rth_output; unsigned long fnhe_stamp; + struct rcu_head rcu; }; struct fnhe_hash_bucket { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 85f184e429c6..02c62299d717 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; +static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; /* * Interface to generic destination cache. */ @@ -755,7 +756,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, - 0, 0); + 0, jiffies + ip_rt_gc_timeout); } if (kill_route) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -1556,6 +1557,36 @@ static void ip_handle_martian_source(struct net_device *dev, #endif } +static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) +{ + struct fnhe_hash_bucket *hash; + struct fib_nh_exception *fnhe, __rcu **fnhe_p; + u32 hval = fnhe_hashfun(daddr); + + spin_lock_bh(&fnhe_lock); + + hash = rcu_dereference_protected(nh->nh_exceptions, + lockdep_is_held(&fnhe_lock)); + hash += hval; + + fnhe_p = &hash->chain; + fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); + while (fnhe) { + if (fnhe->fnhe_daddr == daddr) { + rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( + fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); + fnhe_flush_routes(fnhe); + kfree_rcu(fnhe, rcu); + break; + } + fnhe_p = &fnhe->fnhe_next; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, + lockdep_is_held(&fnhe_lock)); + } + + spin_unlock_bh(&fnhe_lock); +} + /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, @@ -1609,11 +1640,20 @@ static int __mkroute_input(struct sk_buff *skb, fnhe = find_exception(&FIB_RES_NH(*res), daddr); if (do_cache) { - if (fnhe) + if (fnhe) { rth = rcu_dereference(fnhe->fnhe_rth_input); - else - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rth && rth->dst.expires && + time_after(jiffies, rth->dst.expires)) { + ip_del_fnhe(&FIB_RES_NH(*res), daddr); + fnhe = NULL; + } else { + goto rt_cache; + } + } + + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); +rt_cache: if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -2014,19 +2054,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res, struct fib_nh *nh = &FIB_RES_NH(*res); fnhe = find_exception(nh, fl4->daddr); - if (fnhe) + if (fnhe) { prth = &fnhe->fnhe_rth_output; - else { - if (unlikely(fl4->flowi4_flags & - FLOWI_FLAG_KNOWN_NH && - !(nh->nh_gw && - nh->nh_scope == RT_SCOPE_LINK))) { - do_cache = false; - goto add; + rth = rcu_dereference(*prth); + if (rth && rth->dst.expires && + time_after(jiffies, rth->dst.expires)) { + ip_del_fnhe(nh, fl4->daddr); + fnhe = NULL; + } else { + goto rt_cache; } - prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); } + + if (unlikely(fl4->flowi4_flags & + FLOWI_FLAG_KNOWN_NH && + !(nh->nh_gw && + nh->nh_scope == RT_SCOPE_LINK))) { + do_cache = false; + goto add; + } + prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); rth = rcu_dereference(*prth); + +rt_cache: if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; @@ -2569,7 +2619,6 @@ void ip_rt_multicast_event(struct in_device *in_dev) } #ifdef CONFIG_SYSCTL -static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; -- cgit v1.2.3 From 7716682cc58e305e22207d5bb315f26af6b1e243 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Feb 2016 05:39:18 -0800 Subject: tcp/dccp: fix another race at listener dismantle Ilya reported following lockdep splat: kernel: ========================= kernel: [ BUG: held lock freed! ] kernel: 4.5.0-rc1-ceph-00026-g5e0a311 #1 Not tainted kernel: ------------------------- kernel: swapper/5/0 is freeing memory ffff880035c9d200-ffff880035c9dbff, with a lock still held there! kernel: (&(&queue->rskq_lock)->rlock){+.-...}, at: [] inet_csk_reqsk_queue_add+0x28/0xa0 kernel: 4 locks held by swapper/5/0: kernel: #0: (rcu_read_lock){......}, at: [] netif_receive_skb_internal+0x4b/0x1f0 kernel: #1: (rcu_read_lock){......}, at: [] ip_local_deliver_finish+0x3f/0x380 kernel: #2: (slock-AF_INET){+.-...}, at: [] sk_clone_lock+0x19b/0x440 kernel: #3: (&(&queue->rskq_lock)->rlock){+.-...}, at: [] inet_csk_reqsk_queue_add+0x28/0xa0 To properly fix this issue, inet_csk_reqsk_queue_add() needs to return to its callers if the child as been queued into accept queue. We also need to make sure listener is still there before calling sk->sk_data_ready(), by holding a reference on it, since the reference carried by the child can disappear as soon as the child is put on accept queue. Reported-by: Ilya Dryomov Fixes: ebb516af60e1 ("tcp/dccp: fix race at listener dismantle phase") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 5 +++-- net/dccp/ipv4.c | 14 +++++++------- net/dccp/ipv6.c | 14 +++++++------- net/ipv4/inet_connection_sock.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 14 +++++++------- net/ipv6/tcp_ipv6.c | 14 +++++++------- 6 files changed, 38 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 481fe1c9044c..49dcad4fe99e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -270,8 +270,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct sock *newsk, const struct request_sock *req); -void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, - struct sock *child); +struct sock *inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child); void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, unsigned long timeout); struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 5684e14932bd..902d606324a0 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -824,26 +824,26 @@ lookup: if (sk->sk_state == DCCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; - if (likely(sk->sk_state == DCCP_LISTEN)) { - nsk = dccp_check_req(sk, skb, req); - } else { + if (unlikely(sk->sk_state != DCCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = dccp_check_req(sk, skb, req); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { dccp_v4_ctl_send_reset(sk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9c6d0508e63a..b8608b71a66d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -691,26 +691,26 @@ lookup: if (sk->sk_state == DCCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; - if (likely(sk->sk_state == DCCP_LISTEN)) { - nsk = dccp_check_req(sk, skb, req); - } else { + if (unlikely(sk->sk_state != DCCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = dccp_check_req(sk, skb, req); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { dccp_v6_ctl_send_reset(sk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 46b9c887bede..64148914803a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -789,14 +789,16 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, reqsk_put(req); } -void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, - struct sock *child) +struct sock *inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; spin_lock(&queue->rskq_lock); if (unlikely(sk->sk_state != TCP_LISTEN)) { inet_child_forget(sk, req, child); + child = NULL; } else { req->sk = child; req->dl_next = NULL; @@ -808,6 +810,7 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, sk_acceptq_added(sk); } spin_unlock(&queue->rskq_lock); + return child; } EXPORT_SYMBOL(inet_csk_reqsk_queue_add); @@ -817,11 +820,8 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, if (own_req) { inet_csk_reqsk_queue_drop(sk, req); reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); - inet_csk_reqsk_queue_add(sk, req, child); - /* Warning: caller must not call reqsk_put(req); - * child stole last reference on it. - */ - return child; + if (inet_csk_reqsk_queue_add(sk, req, child)) + return child; } /* Too bad, another child took ownership of the request, undo. */ bh_unlock_sock(child); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c84477949d3a..487ac67059e2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1597,30 +1597,30 @@ process: if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) { reqsk_put(req); goto discard_it; } - if (likely(sk->sk_state == TCP_LISTEN)) { - nsk = tcp_check_req(sk, skb, req, false); - } else { + if (unlikely(sk->sk_state != TCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1a5a70fb8551..5c8c84273028 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1387,7 +1387,7 @@ process: if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; tcp_v6_fill_cb(skb, hdr, th); @@ -1395,24 +1395,24 @@ process: reqsk_put(req); goto discard_it; } - if (likely(sk->sk_state == TCP_LISTEN)) { - nsk = tcp_check_req(sk, skb, req, false); - } else { + if (unlikely(sk->sk_state != TCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); tcp_v6_restore_cb(skb); } else if (tcp_child_process(sk, nsk, skb)) { tcp_v6_send_reset(nsk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } -- cgit v1.2.3 From 13d34ac6e55b8284c592c67e166ac614b3c4c1d7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 17 Feb 2016 13:11:18 -0800 Subject: Revert "fsnotify: destroy marks with call_srcu instead of dedicated thread" This reverts commit c510eff6beba ("fsnotify: destroy marks with call_srcu instead of dedicated thread"). Eryu reported that he was seeing some OOM kills kick in when running a testcase that adds and removes inotify marks on a file in a tight loop. The above commit changed the code to use call_srcu to clean up the marks. While that does (in principle) work, the srcu callback job is limited to cleaning up entries in small batches and only once per jiffy. It's easily possible to overwhelm that machinery with too many call_srcu callbacks, and Eryu's reproduer did just that. There's also another potential problem with using call_srcu here. While you can obviously sleep while holding the srcu_read_lock, the callbacks run under local_bh_disable, so you can't sleep there. It's possible when putting the last reference to the fsnotify_mark that we'll end up putting a chain of references including the fsnotify_group, uid, and associated keys. While I don't see any obvious ways that that could occurs, it's probably still best to avoid using call_srcu here after all. This patch reverts the above patch. A later patch will take a different approach to eliminated the dedicated thread here. Signed-off-by: Jeff Layton Reported-by: Eryu Guan Tested-by: Eryu Guan Cc: Jan Kara Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/notify/mark.c | 66 +++++++++++++++++++++++++++++++--------- include/linux/fsnotify_backend.h | 5 +-- 2 files changed, 53 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/notify/mark.c b/fs/notify/mark.c index cfcbf114676e..fc0df4442f7b 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -92,6 +92,9 @@ #include "fsnotify.h" struct srcu_struct fsnotify_mark_srcu; +static DEFINE_SPINLOCK(destroy_lock); +static LIST_HEAD(destroy_list); +static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq); void fsnotify_get_mark(struct fsnotify_mark *mark) { @@ -165,19 +168,10 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) atomic_dec(&group->num_marks); } -static void -fsnotify_mark_free_rcu(struct rcu_head *rcu) -{ - struct fsnotify_mark *mark; - - mark = container_of(rcu, struct fsnotify_mark, g_rcu); - fsnotify_put_mark(mark); -} - /* - * Free fsnotify mark. The freeing is actually happening from a call_srcu - * callback. Caller must have a reference to the mark or be protected by - * fsnotify_mark_srcu. + * Free fsnotify mark. The freeing is actually happening from a kthread which + * first waits for srcu period end. Caller must have a reference to the mark + * or be protected by fsnotify_mark_srcu. */ void fsnotify_free_mark(struct fsnotify_mark *mark) { @@ -192,7 +186,10 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; spin_unlock(&mark->lock); - call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); + spin_lock(&destroy_lock); + list_add(&mark->g_list, &destroy_list); + spin_unlock(&destroy_lock); + wake_up(&destroy_waitq); /* * Some groups like to know that marks are being freed. This is a @@ -388,7 +385,11 @@ err: spin_unlock(&mark->lock); - call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); + spin_lock(&destroy_lock); + list_add(&mark->g_list, &destroy_list); + spin_unlock(&destroy_lock); + wake_up(&destroy_waitq); + return ret; } @@ -491,3 +492,40 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, atomic_set(&mark->refcnt, 1); mark->free_mark = free_mark; } + +static int fsnotify_mark_destroy(void *ignored) +{ + struct fsnotify_mark *mark, *next; + struct list_head private_destroy_list; + + for (;;) { + spin_lock(&destroy_lock); + /* exchange the list head */ + list_replace_init(&destroy_list, &private_destroy_list); + spin_unlock(&destroy_lock); + + synchronize_srcu(&fsnotify_mark_srcu); + + list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { + list_del_init(&mark->g_list); + fsnotify_put_mark(mark); + } + + wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list)); + } + + return 0; +} + +static int __init fsnotify_mark_init(void) +{ + struct task_struct *thread; + + thread = kthread_run(fsnotify_mark_destroy, NULL, + "fsnotify_mark"); + if (IS_ERR(thread)) + panic("unable to start fsnotify mark destruction thread."); + + return 0; +} +device_initcall(fsnotify_mark_init); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 6b7e89f45aa4..533c4408529a 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -220,10 +220,7 @@ struct fsnotify_mark { /* List of marks by group->i_fsnotify_marks. Also reused for queueing * mark into destroy_list when it's waiting for the end of SRCU period * before it can be freed. [group->mark_mutex] */ - union { - struct list_head g_list; - struct rcu_head g_rcu; - }; + struct list_head g_list; /* Protects inode / mnt pointers, flags, masks */ spinlock_t lock; /* List of marks for inode / vfsmount [obj_lock] */ -- cgit v1.2.3 From 5fff80bbdb6b84a94f90391ba674471d37b57eb2 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 17 Feb 2016 08:32:05 +0100 Subject: drm/atomic: Allow for holes in connector state, v2. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because we record connector_mask using 1 << drm_connector_index now the connector_mask should stay the same even when other connectors are removed. This was not the case with MST, in that case when removing a connector all other connectors may change their index. This is fixed by waiting until the first get_connector_state to allocate connector_state, and force reallocation when state is too small. As a side effect connector arrays no longer have to be preallocated, and can be allocated on first use which means a less allocations in the page flip only path. Changes since v1: - Whitespace. (Ville) - Call ida_remove when destroying the connector. (Ville) - u32 alloc -> int. (Ville) Fixes: 14de6c44d149 ("drm/atomic: Remove drm_atomic_connectors_for_crtc.") Signed-off-by: Maarten Lankhorst Cc: Ville Syrjälä Reviewed-by: Lyude Reviewed-by: Ville Syrjälä Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_atomic.c | 44 +++++++++++++++------------------ drivers/gpu/drm/drm_atomic_helper.c | 2 +- drivers/gpu/drm/drm_crtc.c | 49 +++++++++++++++---------------------- include/drm/drm_crtc.h | 8 +++++- 4 files changed, 48 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 3f74193885f1..9a7b44616b55 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -65,8 +65,6 @@ drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state) */ state->allow_modeset = true; - state->num_connector = ACCESS_ONCE(dev->mode_config.num_connector); - state->crtcs = kcalloc(dev->mode_config.num_crtc, sizeof(*state->crtcs), GFP_KERNEL); if (!state->crtcs) @@ -83,16 +81,6 @@ drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state) sizeof(*state->plane_states), GFP_KERNEL); if (!state->plane_states) goto fail; - state->connectors = kcalloc(state->num_connector, - sizeof(*state->connectors), - GFP_KERNEL); - if (!state->connectors) - goto fail; - state->connector_states = kcalloc(state->num_connector, - sizeof(*state->connector_states), - GFP_KERNEL); - if (!state->connector_states) - goto fail; state->dev = dev; @@ -823,19 +811,27 @@ drm_atomic_get_connector_state(struct drm_atomic_state *state, index = drm_connector_index(connector); - /* - * Construction of atomic state updates can race with a connector - * hot-add which might overflow. In this case flip the table and just - * restart the entire ioctl - no one is fast enough to livelock a cpu - * with physical hotplug events anyway. - * - * Note that we only grab the indexes once we have the right lock to - * prevent hotplug/unplugging of connectors. So removal is no problem, - * at most the array is a bit too large. - */ if (index >= state->num_connector) { - DRM_DEBUG_ATOMIC("Hot-added connector would overflow state array, restarting\n"); - return ERR_PTR(-EAGAIN); + struct drm_connector **c; + struct drm_connector_state **cs; + int alloc = max(index + 1, config->num_connector); + + c = krealloc(state->connectors, alloc * sizeof(*state->connectors), GFP_KERNEL); + if (!c) + return ERR_PTR(-ENOMEM); + + state->connectors = c; + memset(&state->connectors[state->num_connector], 0, + sizeof(*state->connectors) * (alloc - state->num_connector)); + + cs = krealloc(state->connector_states, alloc * sizeof(*state->connector_states), GFP_KERNEL); + if (!cs) + return ERR_PTR(-ENOMEM); + + state->connector_states = cs; + memset(&state->connector_states[state->num_connector], 0, + sizeof(*state->connector_states) * (alloc - state->num_connector)); + state->num_connector = alloc; } if (state->connector_states[index]) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 7c523060a076..4f2d3e161593 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1493,7 +1493,7 @@ void drm_atomic_helper_swap_state(struct drm_device *dev, { int i; - for (i = 0; i < dev->mode_config.num_connector; i++) { + for (i = 0; i < state->num_connector; i++) { struct drm_connector *connector = state->connectors[i]; if (!connector) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index d40bab29747e..f6191215b2cb 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -918,12 +918,19 @@ int drm_connector_init(struct drm_device *dev, connector->base.properties = &connector->properties; connector->dev = dev; connector->funcs = funcs; + + connector->connector_id = ida_simple_get(&config->connector_ida, 0, 0, GFP_KERNEL); + if (connector->connector_id < 0) { + ret = connector->connector_id; + goto out_put; + } + connector->connector_type = connector_type; connector->connector_type_id = ida_simple_get(connector_ida, 1, 0, GFP_KERNEL); if (connector->connector_type_id < 0) { ret = connector->connector_type_id; - goto out_put; + goto out_put_id; } connector->name = kasprintf(GFP_KERNEL, "%s-%d", @@ -931,7 +938,7 @@ int drm_connector_init(struct drm_device *dev, connector->connector_type_id); if (!connector->name) { ret = -ENOMEM; - goto out_put; + goto out_put_type_id; } INIT_LIST_HEAD(&connector->probed_modes); @@ -959,7 +966,12 @@ int drm_connector_init(struct drm_device *dev, } connector->debugfs_entry = NULL; - +out_put_type_id: + if (ret) + ida_remove(connector_ida, connector->connector_type_id); +out_put_id: + if (ret) + ida_remove(&config->connector_ida, connector->connector_id); out_put: if (ret) drm_mode_object_put(dev, &connector->base); @@ -996,6 +1008,9 @@ void drm_connector_cleanup(struct drm_connector *connector) ida_remove(&drm_connector_enum_list[connector->connector_type].ida, connector->connector_type_id); + ida_remove(&dev->mode_config.connector_ida, + connector->connector_id); + kfree(connector->display_info.bus_formats); drm_mode_object_put(dev, &connector->base); kfree(connector->name); @@ -1012,32 +1027,6 @@ void drm_connector_cleanup(struct drm_connector *connector) } EXPORT_SYMBOL(drm_connector_cleanup); -/** - * drm_connector_index - find the index of a registered connector - * @connector: connector to find index for - * - * Given a registered connector, return the index of that connector within a DRM - * device's list of connectors. - */ -unsigned int drm_connector_index(struct drm_connector *connector) -{ - unsigned int index = 0; - struct drm_connector *tmp; - struct drm_mode_config *config = &connector->dev->mode_config; - - WARN_ON(!drm_modeset_is_locked(&config->connection_mutex)); - - drm_for_each_connector(tmp, connector->dev) { - if (tmp == connector) - return index; - - index++; - } - - BUG(); -} -EXPORT_SYMBOL(drm_connector_index); - /** * drm_connector_register - register a connector * @connector: the connector to register @@ -5789,6 +5778,7 @@ void drm_mode_config_init(struct drm_device *dev) INIT_LIST_HEAD(&dev->mode_config.plane_list); idr_init(&dev->mode_config.crtc_idr); idr_init(&dev->mode_config.tile_idr); + ida_init(&dev->mode_config.connector_ida); drm_modeset_lock_all(dev); drm_mode_create_standard_properties(dev); @@ -5869,6 +5859,7 @@ void drm_mode_config_cleanup(struct drm_device *dev) crtc->funcs->destroy(crtc); } + ida_destroy(&dev->mode_config.connector_ida); idr_destroy(&dev->mode_config.tile_idr); idr_destroy(&dev->mode_config.crtc_idr); drm_modeset_lock_fini(&dev->mode_config.connection_mutex); diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index c65a212db77e..c5b4b81a831b 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -1166,6 +1166,7 @@ struct drm_connector { struct drm_mode_object base; char *name; + int connector_id; int connector_type; int connector_type_id; bool interlace_allowed; @@ -2047,6 +2048,7 @@ struct drm_mode_config { struct list_head fb_list; int num_connector; + struct ida connector_ida; struct list_head connector_list; int num_encoder; struct list_head encoder_list; @@ -2200,7 +2202,11 @@ int drm_connector_register(struct drm_connector *connector); void drm_connector_unregister(struct drm_connector *connector); extern void drm_connector_cleanup(struct drm_connector *connector); -extern unsigned int drm_connector_index(struct drm_connector *connector); +static inline unsigned drm_connector_index(struct drm_connector *connector) +{ + return connector->connector_id; +} + /* helper to unplug all connectors from sysfs for device */ extern void drm_connector_unplug_all(struct drm_device *dev); -- cgit v1.2.3 From cfdd28beb3205dbd1e91571516807199c8ab84ca Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 17 Feb 2016 18:00:31 +0100 Subject: net: make netdev_for_each_lower_dev safe for device removal When I used netdev_for_each_lower_dev in commit bad531623253 ("vrf: remove slave queue and private slave struct") I thought that it acts like netdev_for_each_lower_private and can be used to remove the current device from the list while walking, but unfortunately it acts more like netdev_for_each_lower_private_rcu and doesn't allow it. The difference is where the "iter" points to, right now it points to the current element and that makes it impossible to remove it. Change the logic to be similar to netdev_for_each_lower_private and make it point to the "next" element so we can safely delete the current one. VRF is the only such user right now, there's no change for the read-only users. Here's what can happen now: [98423.249858] general protection fault: 0000 [#1] SMP [98423.250175] Modules linked in: vrf bridge(O) stp llc nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace sunrpc crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel jitterentropy_rng sha256_generic hmac drbg ppdev aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd evdev serio_raw pcspkr virtio_balloon parport_pc parport i2c_piix4 i2c_core virtio_console acpi_cpufreq button 9pnet_virtio 9p 9pnet fscache ipv6 autofs4 ext4 crc16 mbcache jbd2 sg virtio_blk virtio_net sr_mod cdrom e1000 ata_generic ehci_pci uhci_hcd ehci_hcd usbcore usb_common virtio_pci ata_piix libata floppy virtio_ring virtio scsi_mod [last unloaded: bridge] [98423.255040] CPU: 1 PID: 14173 Comm: ip Tainted: G O 4.5.0-rc2+ #81 [98423.255386] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 [98423.255777] task: ffff8800547f5540 ti: ffff88003428c000 task.ti: ffff88003428c000 [98423.256123] RIP: 0010:[] [] netdev_lower_get_next+0x1e/0x30 [98423.256534] RSP: 0018:ffff88003428f940 EFLAGS: 00010207 [98423.256766] RAX: 0002000100000004 RBX: ffff880054ff9000 RCX: 0000000000000000 [98423.257039] RDX: ffff88003428f8b8 RSI: ffff88003428f950 RDI: ffff880054ff90c0 [98423.257287] RBP: ffff88003428f940 R08: 0000000000000000 R09: 0000000000000000 [98423.257537] R10: 0000000000000001 R11: 0000000000000000 R12: ffff88003428f9e0 [98423.257802] R13: ffff880054a5fd00 R14: ffff88003428f970 R15: 0000000000000001 [98423.258055] FS: 00007f3d76881700(0000) GS:ffff88005d000000(0000) knlGS:0000000000000000 [98423.258418] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [98423.258650] CR2: 00007ffe5951ffa8 CR3: 0000000052077000 CR4: 00000000000406e0 [98423.258902] Stack: [98423.259075] ffff88003428f960 ffffffffa0442636 0002000100000004 ffff880054ff9000 [98423.259647] ffff88003428f9b0 ffffffff81518205 ffff880054ff9000 ffff88003428f978 [98423.260208] ffff88003428f978 ffff88003428f9e0 ffff88003428f9e0 ffff880035b35f00 [98423.260739] Call Trace: [98423.260920] [] vrf_dev_uninit+0x76/0xa0 [vrf] [98423.261156] [] rollback_registered_many+0x205/0x390 [98423.261401] [] unregister_netdevice_many+0x1c/0x70 [98423.261641] [] rtnl_delete_link+0x3c/0x50 [98423.271557] [] rtnl_dellink+0xcb/0x1d0 [98423.271800] [] ? __inc_zone_state+0x4a/0x90 [98423.272049] [] rtnetlink_rcv_msg+0x84/0x200 [98423.272279] [] ? trace_hardirqs_on+0xd/0x10 [98423.272513] [] ? rtnetlink_rcv+0x1b/0x40 [98423.272755] [] ? rtnetlink_rcv+0x40/0x40 [98423.272983] [] netlink_rcv_skb+0x97/0xb0 [98423.273209] [] rtnetlink_rcv+0x2a/0x40 [98423.273476] [] netlink_unicast+0x11b/0x1a0 [98423.273710] [] netlink_sendmsg+0x3e1/0x610 [98423.273947] [] sock_sendmsg+0x38/0x70 [98423.274175] [] ___sys_sendmsg+0x2e3/0x2f0 [98423.274416] [] ? do_raw_spin_unlock+0xbe/0x140 [98423.274658] [] ? handle_mm_fault+0x26c/0x2210 [98423.274894] [] ? handle_mm_fault+0x4d/0x2210 [98423.275130] [] ? __fget_light+0x91/0xb0 [98423.275365] [] __sys_sendmsg+0x42/0x80 [98423.275595] [] SyS_sendmsg+0x12/0x20 [98423.275827] [] entry_SYSCALL_64_fastpath+0x16/0x7a [98423.276073] Code: c3 31 c0 5d c3 0f 1f 84 00 00 00 00 00 66 66 66 66 90 48 8b 06 55 48 81 c7 c0 00 00 00 48 89 e5 48 8b 00 48 39 f8 74 09 48 89 06 <48> 8b 40 e8 5d c3 31 c0 5d c3 0f 1f 84 00 00 00 00 00 66 66 66 [98423.279639] RIP [] netdev_lower_get_next+0x1e/0x30 [98423.279920] RSP CC: David Ahern CC: David S. Miller CC: Roopa Prabhu CC: Vlad Yasevich Fixes: bad531623253 ("vrf: remove slave queue and private slave struct") Signed-off-by: Nikolay Aleksandrov Reviewed-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- net/core/dev.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 289c2314d766..5440b7b705eb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3718,7 +3718,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter); #define netdev_for_each_lower_dev(dev, ldev, iter) \ - for (iter = &(dev)->adj_list.lower, \ + for (iter = (dev)->adj_list.lower.next, \ ldev = netdev_lower_get_next(dev, &(iter)); \ ldev; \ ldev = netdev_lower_get_next(dev, &(iter))) diff --git a/net/core/dev.c b/net/core/dev.c index e15e6e6a7a8a..0ef061b2badc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5379,12 +5379,12 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) { struct netdev_adjacent *lower; - lower = list_entry((*iter)->next, struct netdev_adjacent, list); + lower = list_entry(*iter, struct netdev_adjacent, list); if (&lower->list == &dev->adj_list.lower) return NULL; - *iter = &lower->list; + *iter = lower->list.next; return lower->dev; } -- cgit v1.2.3 From b1cb86ae0e5951e9747ec7a5b33d1c1217791b75 Mon Sep 17 00:00:00 2001 From: "Karicheri, Muralidharan" Date: Fri, 19 Feb 2016 12:58:43 -0500 Subject: soc: ti: knav_dma: rename pad in struct knav_dma_desc to sw_data Rename the pad to sw_data as per description of this field in the hardware spec(refer sprugr9 from www.ti.com). Latest version of the document is at http://www.ti.com/lit/ug/sprugr9h/sprugr9h.pdf and section 3.1 Host Packet Descriptor describes this field. Define and use a constant for the size of sw_data field similar to other fields in the struct for desc and document the sw_data field in the header. As the sw_data is not touched by hw, it's type can be changed to u32. Rename the helpers to match with the updated dma desc field sw_data. Cc: Wingman Kwok Cc: Mugunthan V N CC: Arnd Bergmann CC: Grygorii Strashko CC: David Laight Signed-off-by: Murali Karicheri Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_core.c | 40 +++++++++++++++++++----------------- include/linux/soc/ti/knav_dma.h | 4 +++- 2 files changed, 24 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 0b26e5209413..84bab29995fd 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -117,10 +117,11 @@ static void get_pkt_info(dma_addr_t *buff, u32 *buff_len, dma_addr_t *ndesc, *ndesc = le32_to_cpu(desc->next_desc); } -static void get_pad_info(u32 *pad0, u32 *pad1, struct knav_dma_desc *desc) +static void get_sw_data(u32 *data0, u32 *data1, struct knav_dma_desc *desc) { - *pad0 = le32_to_cpu(desc->pad[0]); - *pad1 = le32_to_cpu(desc->pad[1]); + /* No Endian conversion needed as this data is untouched by hw */ + *data0 = desc->sw_data[0]; + *data1 = desc->sw_data[1]; } static void get_org_pkt_info(dma_addr_t *buff, u32 *buff_len, @@ -153,10 +154,11 @@ static void set_desc_info(u32 desc_info, u32 pkt_info, desc->packet_info = cpu_to_le32(pkt_info); } -static void set_pad_info(u32 pad0, u32 pad1, struct knav_dma_desc *desc) +static void set_sw_data(u32 data0, u32 data1, struct knav_dma_desc *desc) { - desc->pad[0] = cpu_to_le32(pad0); - desc->pad[1] = cpu_to_le32(pad1); + /* No Endian conversion needed as this data is untouched by hw */ + desc->sw_data[0] = data0; + desc->sw_data[1] = data1; } static void set_org_pkt_info(dma_addr_t buff, u32 buff_len, @@ -581,12 +583,12 @@ static void netcp_free_rx_desc_chain(struct netcp_intf *netcp, break; } get_pkt_info(&dma_buf, &tmp, &dma_desc, ndesc); - get_pad_info((u32 *)&buf_ptr, &buf_len, ndesc); + get_sw_data((u32 *)&buf_ptr, &buf_len, ndesc); dma_unmap_page(netcp->dev, dma_buf, PAGE_SIZE, DMA_FROM_DEVICE); __free_page(buf_ptr); knav_pool_desc_put(netcp->rx_pool, desc); } - get_pad_info((u32 *)&buf_ptr, &buf_len, desc); + get_sw_data((u32 *)&buf_ptr, &buf_len, desc); if (buf_ptr) netcp_frag_free(buf_len <= PAGE_SIZE, buf_ptr); @@ -639,7 +641,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp) } get_pkt_info(&dma_buff, &buf_len, &dma_desc, desc); - get_pad_info((u32 *)&org_buf_ptr, &org_buf_len, desc); + get_sw_data((u32 *)&org_buf_ptr, &org_buf_len, desc); if (unlikely(!org_buf_ptr)) { dev_err(netcp->ndev_dev, "NULL bufptr in desc\n"); @@ -672,7 +674,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp) } get_pkt_info(&dma_buff, &buf_len, &dma_desc, ndesc); - get_pad_info((u32 *)&page, &tmp, ndesc); + get_sw_data((u32 *)&page, &tmp, ndesc); if (likely(dma_buff && buf_len && page)) { dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE, @@ -761,7 +763,7 @@ static void netcp_free_rx_buf(struct netcp_intf *netcp, int fdq) } get_org_pkt_info(&dma, &buf_len, desc); - get_pad_info((u32 *)&buf_ptr, &tmp, desc); + get_sw_data((u32 *)&buf_ptr, &tmp, desc); if (unlikely(!dma)) { dev_err(netcp->ndev_dev, "NULL orig_buff in desc\n"); @@ -813,7 +815,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) struct page *page; dma_addr_t dma; void *bufptr; - u32 pad[2]; + u32 sw_data[2]; /* Allocate descriptor */ hwdesc = knav_pool_desc_get(netcp->rx_pool); @@ -830,7 +832,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); bufptr = netdev_alloc_frag(primary_buf_len); - pad[1] = primary_buf_len; + sw_data[1] = primary_buf_len; if (unlikely(!bufptr)) { dev_warn_ratelimited(netcp->ndev_dev, @@ -842,7 +844,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) if (unlikely(dma_mapping_error(netcp->dev, dma))) goto fail; - pad[0] = (u32)bufptr; + sw_data[0] = (u32)bufptr; } else { /* Allocate a secondary receive queue entry */ page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD); @@ -852,8 +854,8 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) } buf_len = PAGE_SIZE; dma = dma_map_page(netcp->dev, page, 0, buf_len, DMA_TO_DEVICE); - pad[0] = (u32)page; - pad[1] = 0; + sw_data[0] = (u32)page; + sw_data[1] = 0; } desc_info = KNAV_DMA_DESC_PS_INFO_IN_DESC; @@ -863,7 +865,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) pkt_info |= (netcp->rx_queue_id & KNAV_DMA_DESC_RETQ_MASK) << KNAV_DMA_DESC_RETQ_SHIFT; set_org_pkt_info(dma, buf_len, hwdesc); - set_pad_info(pad[0], pad[1], hwdesc); + set_sw_data(sw_data[0], sw_data[1], hwdesc); set_desc_info(desc_info, pkt_info, hwdesc); /* Push to FDQs */ @@ -969,7 +971,7 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp, continue; } - get_pad_info((u32 *)&skb, &tmp, desc); + get_sw_data((u32 *)&skb, &tmp, desc); netcp_free_tx_desc_chain(netcp, desc, dma_sz); if (!skb) { dev_err(netcp->ndev_dev, "No skb in Tx desc\n"); @@ -1174,7 +1176,7 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp, } set_words(&tmp, 1, &desc->packet_info); - set_words((u32 *)&skb, 1, &desc->pad[0]); + set_sw_data((u32)skb, 0, desc); if (tx_pipe->flags & SWITCH_TO_PORT_IN_TAGINFO) { tmp = tx_pipe->switch_to_port; diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h index 343c13ac4f71..35cb9264e0d5 100644 --- a/include/linux/soc/ti/knav_dma.h +++ b/include/linux/soc/ti/knav_dma.h @@ -44,6 +44,7 @@ #define KNAV_DMA_NUM_EPIB_WORDS 4 #define KNAV_DMA_NUM_PS_WORDS 16 +#define KNAV_DMA_NUM_SW_DATA_WORDS 4 #define KNAV_DMA_FDQ_PER_CHAN 4 /* Tx channel scheduling priority */ @@ -142,6 +143,7 @@ struct knav_dma_cfg { * @orig_buff: buff pointer since 'buff' can be overwritten * @epib: Extended packet info block * @psdata: Protocol specific + * @sw_data: Software private data not touched by h/w */ struct knav_dma_desc { __le32 desc_info; @@ -154,7 +156,7 @@ struct knav_dma_desc { __le32 orig_buff; __le32 epib[KNAV_DMA_NUM_EPIB_WORDS]; __le32 psdata[KNAV_DMA_NUM_PS_WORDS]; - __le32 pad[4]; + u32 sw_data[KNAV_DMA_NUM_SW_DATA_WORDS]; } ____cacheline_aligned; #if IS_ENABLED(CONFIG_KEYSTONE_NAVIGATOR_DMA) -- cgit v1.2.3