diff options
Diffstat (limited to 'drivers/misc')
-rw-r--r-- | drivers/misc/habanalabs/common/context.c | 8 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/device.c | 56 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h | 7 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs_drv.c | 9 |
4 files changed, 50 insertions, 30 deletions
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index 49e6f1172d18..c6360e33bce8 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -283,11 +283,9 @@ struct hl_ctx *hl_get_compute_ctx(struct hl_device *hdev) /* There can only be a single user which has opened the compute device, so exit * immediately once we find him */ - if (!hpriv->is_control) { - ctx = hpriv->ctx; - hl_ctx_get(hdev, ctx); - break; - } + ctx = hpriv->ctx; + hl_ctx_get(hdev, ctx); + break; } mutex_unlock(&hdev->fpriv_list_lock); diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index bea05a59425f..f1f482c5cdcb 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -169,9 +169,9 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp) goto out; } - mutex_lock(&hdev->fpriv_list_lock); + mutex_lock(&hdev->fpriv_ctrl_list_lock); list_del(&hpriv->dev_node); - mutex_unlock(&hdev->fpriv_list_lock); + mutex_unlock(&hdev->fpriv_ctrl_list_lock); out: put_pid(hpriv->taskpid); @@ -449,7 +449,9 @@ static int device_early_init(struct hl_device *hdev) INIT_LIST_HEAD(&hdev->cs_mirror_list); spin_lock_init(&hdev->cs_mirror_lock); INIT_LIST_HEAD(&hdev->fpriv_list); + INIT_LIST_HEAD(&hdev->fpriv_ctrl_list); mutex_init(&hdev->fpriv_list_lock); + mutex_init(&hdev->fpriv_ctrl_list_lock); atomic_set(&hdev->in_reset, 0); mutex_init(&hdev->clk_throttling.lock); @@ -491,6 +493,7 @@ static void device_early_fini(struct hl_device *hdev) mutex_destroy(&hdev->send_cpu_message_lock); mutex_destroy(&hdev->fpriv_list_lock); + mutex_destroy(&hdev->fpriv_ctrl_list_lock); mutex_destroy(&hdev->clk_throttling.lock); @@ -678,6 +681,8 @@ static void take_release_locks(struct hl_device *hdev) /* Flush anyone that is inside device open */ mutex_lock(&hdev->fpriv_list_lock); mutex_unlock(&hdev->fpriv_list_lock); + mutex_lock(&hdev->fpriv_ctrl_list_lock); + mutex_unlock(&hdev->fpriv_ctrl_list_lock); } static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset) @@ -789,17 +794,21 @@ disable_device: return rc; } -static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) +static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev) { - struct hl_fpriv *hpriv; struct task_struct *task = NULL; + struct list_head *fd_list; + struct hl_fpriv *hpriv; + struct mutex *fd_lock; u32 pending_cnt; + fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; + fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; /* Giving time for user to close FD, and for processes that are inside * hl_device_open to finish */ - if (!list_empty(&hdev->fpriv_list)) + if (!list_empty(fd_list)) ssleep(1); if (timeout) { @@ -815,12 +824,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) } } - mutex_lock(&hdev->fpriv_list_lock); + mutex_lock(fd_lock); /* This section must be protected because we are dereferencing * pointers that are freed if the process exits */ - list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) { + list_for_each_entry(hpriv, fd_list, dev_node) { task = get_pid_task(hpriv->taskpid, PIDTYPE_PID); if (task) { dev_info(hdev->dev, "Killing user process pid=%d\n", @@ -832,12 +841,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) } else { dev_warn(hdev->dev, "Can't get task struct for PID so giving up on killing process\n"); - mutex_unlock(&hdev->fpriv_list_lock); + mutex_unlock(fd_lock); return -ETIME; } } - mutex_unlock(&hdev->fpriv_list_lock); + mutex_unlock(fd_lock); /* * We killed the open users, but that doesn't mean they are closed. @@ -849,7 +858,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) */ wait_for_processes: - while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) { + while ((!list_empty(fd_list)) && (pending_cnt)) { dev_dbg(hdev->dev, "Waiting for all unmap operations to finish before hard reset\n"); @@ -859,7 +868,7 @@ wait_for_processes: } /* All processes exited successfully */ - if (list_empty(&hdev->fpriv_list)) + if (list_empty(fd_list)) return 0; /* Give up waiting for processes to exit */ @@ -871,14 +880,19 @@ wait_for_processes: return -EBUSY; } -static void device_disable_open_processes(struct hl_device *hdev) +static void device_disable_open_processes(struct hl_device *hdev, bool control_dev) { + struct list_head *fd_list; struct hl_fpriv *hpriv; + struct mutex *fd_lock; - mutex_lock(&hdev->fpriv_list_lock); - list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) + fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; + fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; + + mutex_lock(fd_lock); + list_for_each_entry(hpriv, fd_list, dev_node) hpriv->hdev = NULL; - mutex_unlock(&hdev->fpriv_list_lock); + mutex_unlock(fd_lock); } static void handle_reset_trigger(struct hl_device *hdev, u32 flags) @@ -1057,7 +1071,7 @@ kill_processes: * process can't really exit until all its CSs are done, which * is what we do in cs rollback */ - rc = device_kill_open_processes(hdev, 0); + rc = device_kill_open_processes(hdev, 0, false); if (rc == -EBUSY) { if (hdev->device_fini_pending) { @@ -1629,10 +1643,16 @@ void hl_device_fini(struct hl_device *hdev) "Waiting for all processes to exit (timeout of %u seconds)", HL_PENDING_RESET_LONG_SEC); - rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC); + rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC, false); if (rc) { dev_crit(hdev->dev, "Failed to kill all open processes\n"); - device_disable_open_processes(hdev); + device_disable_open_processes(hdev, false); + } + + rc = device_kill_open_processes(hdev, 0, true); + if (rc) { + dev_crit(hdev->dev, "Failed to kill all control device open processes\n"); + device_disable_open_processes(hdev, true); } hl_cb_pool_fini(hdev); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 362eee3f028c..015aa1ee8ce0 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1824,7 +1824,6 @@ struct hl_debug_params { * @dev_node: node in the device list of file private data * @refcount: number of related contexts. * @restore_phase_mutex: lock for context switch and restore phase. - * @is_control: true for control device, false otherwise */ struct hl_fpriv { struct hl_device *hdev; @@ -1837,7 +1836,6 @@ struct hl_fpriv { struct list_head dev_node; struct kref refcount; struct mutex restore_phase_mutex; - u8 is_control; }; @@ -2502,7 +2500,10 @@ struct last_error_session_info { * @internal_cb_va_base: internal cb pool mmu virtual address base * @fpriv_list: list of file private data structures. Each structure is created * when a user opens the device + * @fpriv_ctrl_list: list of file private data structures. Each structure is created + * when a user opens the control device * @fpriv_list_lock: protects the fpriv_list + * @fpriv_ctrl_list_lock: protects the fpriv_ctrl_list * @aggregated_cs_counters: aggregated cs counters among all contexts * @mmu_priv: device-specific MMU data. * @mmu_func: device-related MMU functions. @@ -2655,7 +2656,9 @@ struct hl_device { u64 internal_cb_va_base; struct list_head fpriv_list; + struct list_head fpriv_ctrl_list; struct mutex fpriv_list_lock; + struct mutex fpriv_ctrl_list_lock; struct hl_cs_counters_atomic aggregated_cs_counters; diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index d59201f93de9..aa4e07b1f839 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -220,12 +220,11 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) hpriv->hdev = hdev; filp->private_data = hpriv; hpriv->filp = filp; - hpriv->is_control = true; nonseekable_open(inode, filp); hpriv->taskpid = find_get_pid(current->pid); - mutex_lock(&hdev->fpriv_list_lock); + mutex_lock(&hdev->fpriv_ctrl_list_lock); if (!hl_device_operational(hdev, NULL)) { dev_err_ratelimited(hdev->dev_ctrl, @@ -235,13 +234,13 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) goto out_err; } - list_add(&hpriv->dev_node, &hdev->fpriv_list); - mutex_unlock(&hdev->fpriv_list_lock); + list_add(&hpriv->dev_node, &hdev->fpriv_ctrl_list); + mutex_unlock(&hdev->fpriv_ctrl_list_lock); return 0; out_err: - mutex_unlock(&hdev->fpriv_list_lock); + mutex_unlock(&hdev->fpriv_ctrl_list_lock); filp->private_data = NULL; put_pid(hpriv->taskpid); |