summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/common/habanalabs.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common/habanalabs.h')
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h209
1 files changed, 146 insertions, 63 deletions
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index cb710fd478b6..1edaf6ab67bd 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
*
- * Copyright 2016-2021 HabanaLabs, Ltd.
+ * Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
@@ -31,14 +31,15 @@
#define HL_NAME "habanalabs"
/* Use upper bits of mmap offset to store habana driver specific information.
- * bits[63:61] - Encode mmap type
+ * bits[63:59] - Encode mmap type
* bits[45:0] - mmap offset value
*
* NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
* defines are w.r.t to PAGE_SIZE
*/
-#define HL_MMAP_TYPE_SHIFT (61 - PAGE_SHIFT)
-#define HL_MMAP_TYPE_MASK (0x7ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_SHIFT (59 - PAGE_SHIFT)
+#define HL_MMAP_TYPE_MASK (0x1full << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_TS_BUFF (0x10ull << HL_MMAP_TYPE_SHIFT)
#define HL_MMAP_TYPE_BLOCK (0x4ull << HL_MMAP_TYPE_SHIFT)
#define HL_MMAP_TYPE_CB (0x2ull << HL_MMAP_TYPE_SHIFT)
@@ -141,6 +142,9 @@ enum hl_mmu_page_table_location {
*
* - HL_DRV_RESET_FW_FATAL_ERR
* Set if reset is due to a fatal error from FW
+ *
+ * - HL_DRV_RESET_DELAY
+ * Set if a delay should be added before the reset
*/
#define HL_DRV_RESET_HARD (1 << 0)
@@ -150,6 +154,7 @@ enum hl_mmu_page_table_location {
#define HL_DRV_RESET_DEV_RELEASE (1 << 4)
#define HL_DRV_RESET_BYPASS_REQ_TO_FW (1 << 5)
#define HL_DRV_RESET_FW_FATAL_ERR (1 << 6)
+#define HL_DRV_RESET_DELAY (1 << 7)
#define HL_MAX_SOBS_PER_MONITOR 8
@@ -402,8 +407,11 @@ enum hl_device_hw_state {
* @hop4_mask: mask to get the PTE address in hop 4.
* @hop5_mask: mask to get the PTE address in hop 5.
* @last_mask: mask to get the bit indicating this is the last hop.
+ * @pgt_size: size for page tables.
* @page_size: default page size used to allocate memory.
* @num_hops: The amount of hops supported by the translation table.
+ * @hop_table_size: HOP table size.
+ * @hop0_tables_total_size: total size for all HOP0 tables.
* @host_resident: Should the MMU page table reside in host memory or in the
* device DRAM.
*/
@@ -423,8 +431,11 @@ struct hl_mmu_properties {
u64 hop4_mask;
u64 hop5_mask;
u64 last_mask;
+ u64 pgt_size;
u32 page_size;
u32 num_hops;
+ u32 hop_table_size;
+ u32 hop0_tables_total_size;
u8 host_resident;
};
@@ -554,6 +565,9 @@ struct hl_hints_range {
* use-case of doing soft-reset in training (due
* to the fact that training runs on multiple
* devices)
+ * @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
+ * @set_max_power_on_device_init: true if need to set max power in F/W on device init.
+ * @supports_user_set_page_size: true if user can set the allocation page size.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@@ -637,6 +651,9 @@ struct asic_fixed_properties {
u8 use_get_power_for_reset_history;
u8 supports_soft_reset;
u8 allow_inference_soft_reset;
+ u8 configurable_stop_on_err;
+ u8 set_max_power_on_device_init;
+ u8 supports_user_set_page_size;
};
/**
@@ -704,6 +721,40 @@ struct hl_cb_mgr {
};
/**
+ * struct hl_ts_mgr - describes the timestamp registration memory manager.
+ * @ts_lock: protects ts_handles.
+ * @ts_handles: an idr to hold all ts bufferes handles.
+ */
+struct hl_ts_mgr {
+ spinlock_t ts_lock;
+ struct idr ts_handles;
+};
+
+/**
+ * struct hl_ts_buff - describes a timestamp buffer.
+ * @refcount: reference counter for usage of the buffer.
+ * @hdev: pointer to device this buffer belongs to.
+ * @mmap: true if the buff is currently mapped to user.
+ * @kernel_buff_address: Holds the internal buffer's kernel virtual address.
+ * @user_buff_address: Holds the user buffer's kernel virtual address.
+ * @id: the buffer ID.
+ * @mmap_size: Holds the buffer size that was mmaped.
+ * @kernel_buff_size: Holds the internal kernel buffer size.
+ * @user_buff_size: Holds the user buffer size.
+ */
+struct hl_ts_buff {
+ struct kref refcount;
+ struct hl_device *hdev;
+ atomic_t mmap;
+ void *kernel_buff_address;
+ void *user_buff_address;
+ u32 id;
+ u32 mmap_size;
+ u32 kernel_buff_size;
+ u32 user_buff_size;
+};
+
+/**
* struct hl_cb - describes a Command Buffer.
* @refcount: reference counter for usage of the CB.
* @hdev: pointer to device this CB belongs to.
@@ -881,8 +932,53 @@ struct hl_user_interrupt {
};
/**
+ * struct timestamp_reg_free_node - holds the timestamp registration free objects node
+ * @free_objects_node: node in the list free_obj_jobs
+ * @cq_cb: pointer to cq command buffer to be freed
+ * @ts_buff: pointer to timestamp buffer to be freed
+ */
+struct timestamp_reg_free_node {
+ struct list_head free_objects_node;
+ struct hl_cb *cq_cb;
+ struct hl_ts_buff *ts_buff;
+};
+
+/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
+ * the job will be to pass over the free_obj_jobs list and put refcount to objects
+ * in each node of the list
+ * @free_obj: workqueue object to free timestamp registration node objects
+ * @hdev: pointer to the device structure
+ * @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node)
+ */
+struct timestamp_reg_work_obj {
+ struct work_struct free_obj;
+ struct hl_device *hdev;
+ struct list_head *free_obj_head;
+};
+
+/* struct timestamp_reg_info - holds the timestamp registration related data.
+ * @ts_buff: pointer to the timestamp buffer which include both user/kernel buffers.
+ * relevant only when doing timestamps records registration.
+ * @cq_cb: pointer to CQ counter CB.
+ * @timestamp_kernel_addr: timestamp handle address, where to set timestamp
+ * relevant only when doing timestamps records
+ * registration.
+ * @in_use: indicates if the node already in use. relevant only when doing
+ * timestamps records registration, since in this case the driver
+ * will have it's own buffer which serve as a records pool instead of
+ * allocating records dynamically.
+ */
+struct timestamp_reg_info {
+ struct hl_ts_buff *ts_buff;
+ struct hl_cb *cq_cb;
+ u64 *timestamp_kernel_addr;
+ u8 in_use;
+};
+
+/**
* struct hl_user_pending_interrupt - holds a context to a user thread
* pending on an interrupt
+ * @ts_reg_info: holds the timestamps registration nodes info
* @wait_list_node: node in the list of user threads pending on an interrupt
* @fence: hl fence object for interrupt completion
* @cq_target_value: CQ target value
@@ -890,10 +986,11 @@ struct hl_user_interrupt {
* handler for taget value comparison
*/
struct hl_user_pending_interrupt {
- struct list_head wait_list_node;
- struct hl_fence fence;
- u64 cq_target_value;
- u64 *cq_kernel_addr;
+ struct timestamp_reg_info ts_reg_info;
+ struct list_head wait_list_node;
+ struct hl_fence fence;
+ u64 cq_target_value;
+ u64 *cq_kernel_addr;
};
/**
@@ -1155,7 +1252,6 @@ struct fw_load_mgr {
* internal memory via DMA engine.
* @add_device_attr: add ASIC specific device attributes.
* @handle_eqe: handle event queue entry (IRQ) from CPU-CP.
- * @set_pll_profile: change PLL profile (manual/automatic).
* @get_events_stat: retrieve event queue entries histogram.
* @read_pte: read MMU page table entry from DRAM.
* @write_pte: write MMU page table entry to DRAM.
@@ -1164,9 +1260,6 @@ struct fw_load_mgr {
* @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
* ASID-VA-size mask.
* @send_heartbeat: send is-alive packet to CPU-CP and verify response.
- * @set_clock_gating: enable/disable clock gating per engine according to
- * clock gating mask in hdev
- * @disable_clock_gating: disable clock gating completely
* @debug_coresight: perform certain actions on Coresight for debugging.
* @is_device_idle: return true if device is idle, false otherwise.
* @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset
@@ -1187,7 +1280,6 @@ struct fw_load_mgr {
* @halt_coresight: stop the ETF and ETR traces.
* @ctx_init: context dependent initialization.
* @ctx_fini: context dependent cleanup.
- * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
* @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
* @load_firmware_to_device: load the firmware to the device's memory
* @load_boot_fit_to_device: load boot fit to device's memory
@@ -1225,6 +1317,8 @@ struct fw_load_mgr {
* @get_sob_addr: get SOB base address offset.
* @set_pci_memory_regions: setting properties of PCI memory regions
* @get_stream_master_qid_arr: get pointer to stream masters QID array
+ * @is_valid_dram_page_size: return true if page size is supported in device
+ * memory allocation, otherwise false.
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -1285,12 +1379,10 @@ struct hl_asic_funcs {
bool user_address, u64 val);
int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size,
void *blob_addr);
- void (*add_device_attr)(struct hl_device *hdev,
- struct attribute_group *dev_attr_grp);
+ void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
+ struct attribute_group *dev_vrm_attr_grp);
void (*handle_eqe)(struct hl_device *hdev,
struct hl_eq_entry *eq_entry);
- void (*set_pll_profile)(struct hl_device *hdev,
- enum hl_pll_frequency freq);
void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
u32 *size);
u64 (*read_pte)(struct hl_device *hdev, u64 addr);
@@ -1300,8 +1392,6 @@ struct hl_asic_funcs {
int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
u32 flags, u32 asid, u64 va, u64 size);
int (*send_heartbeat)(struct hl_device *hdev);
- void (*set_clock_gating)(struct hl_device *hdev);
- void (*disable_clock_gating)(struct hl_device *hdev);
int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
u8 mask_len, struct seq_file *s);
@@ -1320,7 +1410,6 @@ struct hl_asic_funcs {
void (*halt_coresight)(struct hl_device *hdev, struct hl_ctx *ctx);
int (*ctx_init)(struct hl_ctx *ctx);
void (*ctx_fini)(struct hl_ctx *ctx);
- int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
int (*load_firmware_to_device)(struct hl_device *hdev);
int (*load_boot_fit_to_device)(struct hl_device *hdev);
@@ -1355,6 +1444,7 @@ struct hl_asic_funcs {
u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
void (*set_pci_memory_regions)(struct hl_device *hdev);
u32* (*get_stream_master_qid_arr)(void);
+ bool (*is_valid_dram_page_size)(u32 page_size);
};
@@ -1742,6 +1832,8 @@ struct hl_vm_hw_block_list_node {
* @pages: the physical page array.
* @npages: num physical pages in the pack.
* @total_size: total size of all the pages in this list.
+ * @node: used to attach to deletion list that is used when all the allocations are cleared
+ * at the teardown of the context.
* @mapping_cnt: number of shared mappings.
* @exporting_cnt: number of dma-buf exporting.
* @asid: the context related to this list.
@@ -1757,6 +1849,7 @@ struct hl_vm_phys_pg_pack {
u64 *pages;
u64 npages;
u64 total_size;
+ struct list_head node;
atomic_t mapping_cnt;
u32 exporting_cnt;
u32 asid;
@@ -1834,6 +1927,7 @@ struct hl_debug_params {
* @ctx: current executing context. TODO: remove for multiple ctx per process
* @ctx_mgr: context manager to handle multiple context for this FD.
* @cb_mgr: command buffer manager to handle multiple buffers for this FD.
+ * @ts_mem_mgr: timestamp registration manager for alloc/free/map timestamp buffers.
* @debugfs_list: list of relevant ASIC debugfs.
* @dev_node: node in the device list of file private data
* @refcount: number of related contexts.
@@ -1846,6 +1940,7 @@ struct hl_fpriv {
struct hl_ctx *ctx;
struct hl_ctx_mgr ctx_mgr;
struct hl_cb_mgr cb_mgr;
+ struct hl_ts_mgr ts_mem_mgr;
struct list_head debugfs_list;
struct list_head dev_node;
struct kref refcount;
@@ -2518,7 +2613,7 @@ struct hl_reset_info {
* @cq_wq: work queues of completion queues for executing work in process
* context.
* @eq_wq: work queue of event queue for executing work in process context.
- * @sob_reset_wq: work queue for sob reset executions.
+ * @ts_free_obj_wq: work queue for timestamp registration objects release.
* @kernel_ctx: Kernel driver context structure.
* @kernel_queues: array of hl_hw_queue.
* @cs_mirror_list: CS mirror list for TDR.
@@ -2569,9 +2664,6 @@ struct hl_reset_info {
* @max_power: the max power of the device, as configured by the sysadmin. This
* value is saved so in case of hard-reset, the driver will restore
* this value and update the F/W after the re-initialization
- * @clock_gating_mask: is clock gating enabled. bitmask that represents the
- * different engines. See debugfs-driver-habanalabs for
- * details.
* @boot_error_status_mask: contains a mask of the device boot error status.
* Each bit represents a different error, according to
* the defines in hl_boot_if.h. If the bit is cleared,
@@ -2611,8 +2703,6 @@ struct hl_reset_info {
* @in_debug: whether the device is in a state where the profiling/tracing infrastructure
* can be used. This indication is needed because in some ASICs we need to do
* specific operations to enable that infrastructure.
- * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant
- * only to POWER9 machines.
* @cdev_sysfs_created: were char devices and sysfs nodes created.
* @stop_on_err: true if engines should stop on error.
* @supports_sync_stream: is sync stream supported.
@@ -2651,7 +2741,7 @@ struct hl_device {
struct hl_user_interrupt common_user_interrupt;
struct workqueue_struct **cq_wq;
struct workqueue_struct *eq_wq;
- struct workqueue_struct *sob_reset_wq;
+ struct workqueue_struct *ts_free_obj_wq;
struct hl_ctx *kernel_ctx;
struct hl_hw_queue *kernel_queues;
struct list_head cs_mirror_list;
@@ -2710,7 +2800,6 @@ struct hl_device {
atomic64_t dram_used_mem;
u64 timeout_jiffies;
u64 max_power;
- u64 clock_gating_mask;
u64 boot_error_status_mask;
u64 dram_pci_bar_start;
u64 last_successful_open_jif;
@@ -2736,7 +2825,6 @@ struct hl_device {
u8 device_cpu_disabled;
u8 dma_mask;
u8 in_debug;
- u8 power9_64bit_dma_enable;
u8 cdev_sysfs_created;
u8 stop_on_err;
u8 supports_sync_stream;
@@ -2970,7 +3058,7 @@ int hl_cb_pool_fini(struct hl_device *hdev);
int hl_cb_va_pool_init(struct hl_ctx *ctx);
void hl_cb_va_pool_fini(struct hl_ctx *ctx);
-void hl_cs_rollback_all(struct hl_device *hdev);
+void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush);
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
void hl_sob_reset_error(struct kref *ref);
@@ -3024,6 +3112,9 @@ int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size);
int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags);
int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
u32 flags, u32 asid, u64 va, u64 size);
+u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte);
+u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
+ u8 hop_idx, u64 hop_addr, u64 virt_addr);
void hl_mmu_swap_out(struct hl_ctx *ctx);
void hl_mmu_swap_in(struct hl_ctx *ctx);
int hl_mmu_if_set_funcs(struct hl_device *hdev);
@@ -3094,39 +3185,26 @@ enum pci_region hl_get_pci_memory_region(struct hl_device *hdev, u64 addr);
int hl_pci_init(struct hl_device *hdev);
void hl_pci_fini(struct hl_device *hdev);
-long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
- bool curr);
-void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
- u64 freq);
-int hl_get_temperature(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value);
-int hl_set_temperature(struct hl_device *hdev,
- int sensor_index, u32 attr, long value);
-int hl_get_voltage(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value);
-int hl_get_current(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value);
-int hl_get_fan_speed(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value);
-int hl_get_pwm_info(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value);
-void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
- long value);
-u64 hl_get_max_power(struct hl_device *hdev);
-void hl_set_max_power(struct hl_device *hdev);
-int hl_set_voltage(struct hl_device *hdev,
- int sensor_index, u32 attr, long value);
-int hl_set_current(struct hl_device *hdev,
- int sensor_index, u32 attr, long value);
-int hl_set_power(struct hl_device *hdev,
- int sensor_index, u32 attr, long value);
-int hl_get_power(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value);
-int hl_get_clk_rate(struct hl_device *hdev,
- u32 *cur_clk, u32 *max_clk);
-void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
-void hl_add_device_attr(struct hl_device *hdev,
- struct attribute_group *dev_attr_grp);
+long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
+void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
+int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
+int hl_set_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long value);
+int hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
+int hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
+int hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
+int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
+void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value);
+long hl_fw_get_max_power(struct hl_device *hdev);
+void hl_fw_set_max_power(struct hl_device *hdev);
+int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value);
+int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value);
+int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value);
+int hl_get_power(struct hl_device *hdev, int sensor_index, u32 attr, long *value);
+int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
+void hl_fw_set_pll_profile(struct hl_device *hdev);
+void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp);
+void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *dev_vrm_attr_grp);
+
void hw_sob_get(struct hl_hw_sob *hw_sob);
void hw_sob_put(struct hl_hw_sob *hw_sob);
void hl_encaps_handle_do_release(struct kref *ref);
@@ -3146,6 +3224,11 @@ __printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
const char *format, ...);
char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
+void hl_ts_mgr_init(struct hl_ts_mgr *mgr);
+void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr);
+int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr, u32 handle);
+void hl_ts_put(struct hl_ts_buff *buff);
#ifdef CONFIG_DEBUG_FS