From 25e7aeba601c1776cd21d610e3afc8768d0c7f2e Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Tue, 31 Mar 2020 22:46:36 +0300 Subject: habanalabs: Add INFO IOCTL opcode for time sync information Add a new opcode to the INFO IOCTL that retrieves the device time alongside the host time, to allow a user application that want to measure device time together with host time (such as a profiler) to synchronize these times. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 4faa2c9767e5..4d593050c42b 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -101,6 +101,8 @@ enum hl_device_status { * HL_INFO_RESET_COUNT - Retrieve the counts of the soft and hard reset * operations performed on the device since the last * time the driver was loaded. + * HL_INFO_TIME_SYNC - Retrieve the device's time alongside the host's time + * for synchronization. */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -111,6 +113,7 @@ enum hl_device_status { #define HL_INFO_HW_EVENTS_AGGREGATE 7 #define HL_INFO_CLK_RATE 8 #define HL_INFO_RESET_COUNT 9 +#define HL_INFO_TIME_SYNC 10 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -169,6 +172,11 @@ struct hl_info_reset_count { __u32 soft_reset_cnt; }; +struct hl_info_time_sync { + __u64 device_time; + __u64 host_time; +}; + struct hl_info_args { /* Location of relevant struct in userspace */ __u64 return_pointer; -- cgit v1.2.3 From 39b425170d35ff0841084007423f1b82f3b3e5ac Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Fri, 17 Apr 2020 12:12:13 +0300 Subject: habanalabs: leave space for 2xMSG_PROT in CB The user must leave space for 2xMSG_PROT in the external CB, so adjust the define of max size accordingly. The driver, however, can still create a CB with the maximum size of 2MB. Therefore, we need to add a check specifically for the user requested size. Reviewed-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/command_buffer.c | 24 +++++++++++++++++------- include/uapi/misc/habanalabs.h | 3 ++- 2 files changed, 19 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/command_buffer.c b/drivers/misc/habanalabs/command_buffer.c index 53fddbd8e693..6cb92efce4d9 100644 --- a/drivers/misc/habanalabs/command_buffer.c +++ b/drivers/misc/habanalabs/command_buffer.c @@ -105,10 +105,9 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, goto out_err; } - if (cb_size > HL_MAX_CB_SIZE) { - dev_err(hdev->dev, - "CB size %d must be less then %d\n", - cb_size, HL_MAX_CB_SIZE); + if (cb_size > SZ_2M) { + dev_err(hdev->dev, "CB size %d must be less than %d\n", + cb_size, SZ_2M); rc = -EINVAL; goto out_err; } @@ -211,7 +210,7 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) { union hl_cb_args *args = data; struct hl_device *hdev = hpriv->hdev; - u64 handle; + u64 handle = 0; int rc; if (hl_device_disabled_or_in_reset(hdev)) { @@ -223,15 +222,26 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) switch (args->in.op) { case HL_CB_OP_CREATE: - rc = hl_cb_create(hdev, &hpriv->cb_mgr, args->in.cb_size, - &handle, hpriv->ctx->asid); + if (args->in.cb_size > HL_MAX_CB_SIZE) { + dev_err(hdev->dev, + "User requested CB size %d must be less than %d\n", + args->in.cb_size, HL_MAX_CB_SIZE); + rc = -EINVAL; + } else { + rc = hl_cb_create(hdev, &hpriv->cb_mgr, + args->in.cb_size, &handle, + hpriv->ctx->asid); + } + memset(args, 0, sizeof(*args)); args->out.cb_handle = handle; break; + case HL_CB_OP_DESTROY: rc = hl_cb_destroy(hdev, &hpriv->cb_mgr, args->in.cb_handle); break; + default: rc = -ENOTTY; break; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 4d593050c42b..523e511e6cff 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -209,7 +209,8 @@ struct hl_info_args { /* Opcode to destroy previously created command buffer */ #define HL_CB_OP_DESTROY 1 -#define HL_MAX_CB_SIZE 0x200000 /* 2MB */ +/* 2MB minus 32 bytes for 2xMSG_PROT */ +#define HL_MAX_CB_SIZE (0x200000 - 32) struct hl_cb_in { /* Handle of CB or 0 if we want to create one */ -- cgit v1.2.3 From f9e5f29518c1821d794bb7ec7e7c91650f4ded14 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Thu, 7 May 2020 13:41:16 +0300 Subject: uapi: habanalabs: add signal/wait operations This is a pre-requisite to upstreaming GAUDI support. Signal/wait operations are done by the user to perform sync between two Primary Queues (PQs). The sync is done using the sync manager and it is usually resolved inside the device, but sometimes it can be resolved in the host, i.e. the user should be able to wait in the host until a signal has been completed. The mechanism to define signal and wait operations is done by the driver because it needs atomicity and serialization, which is already done in the driver when submitting work to the different queues. To implement this feature, the driver "takes" a couple of h/w resources, and this is reflected by the defines added to the uapi file. The signal/wait operations are done via the existing CS IOCTL, and they use the same data structure. There is a difference in the meaning of some of the parameters, and for that we added unions to make the code more readable. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/command_submission.c | 13 +++++- drivers/misc/habanalabs/habanalabs.h | 2 + include/uapi/misc/habanalabs.h | 67 +++++++++++++++++++++------- 3 files changed, 65 insertions(+), 17 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index 6680e183d881..f7d03a35e6a8 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -11,6 +11,8 @@ #include #include +#define HL_CS_FLAGS_SIG_WAIT (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT) + static void job_wq_completion(struct work_struct *work); static long _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq); @@ -659,7 +661,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) union hl_cs_args *args = data; struct hl_ctx *ctx = hpriv->ctx; void __user *chunks_execute, *chunks_restore; - u32 num_chunks_execute, num_chunks_restore; + u32 num_chunks_execute, num_chunks_restore, sig_wait_flags; u64 cs_seq = ULONG_MAX; int rc, do_ctx_switch; bool need_soft_reset = false; @@ -672,6 +674,15 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) goto out; } + sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT; + + if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) && + (!hdev->supports_sync_stream))) { + dev_err(hdev->dev, "Sync stream CS is not supported\n"); + rc = -EINVAL; + goto out; + } + chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute; num_chunks_execute = args->in.num_chunks_execute; diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index b1dc6a22ba0d..7cd9a8d72451 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -1347,6 +1347,7 @@ struct hl_device_idle_busy_ts { * only to POWER9 machines. * @cdev_sysfs_created: were char devices and sysfs nodes created. * @stop_on_err: true if engines should stop on error. + * @supports_sync_stream: is sync stream supported. */ struct hl_device { struct pci_dev *pdev; @@ -1429,6 +1430,7 @@ struct hl_device { u8 power9_64bit_dma_enable; u8 cdev_sysfs_created; u8 stop_on_err; + u8 supports_sync_stream; /* Parameters for bring-up */ u8 mmu_enable; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 523e511e6cff..70dfccea7038 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note * - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2020 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -241,52 +241,87 @@ union hl_cb_args { * compatibility */ struct hl_cs_chunk { - /* - * For external queue, this represents a Handle of CB on the Host - * For internal queue, this represents an SRAM or DRAM address of the - * internal CB - */ - __u64 cb_handle; + union { + /* For external queue, this represents a Handle of CB on the + * Host. + * For internal queue in Goya, this represents an SRAM or + * a DRAM address of the internal CB. In Gaudi, this might also + * represent a mapped host address of the CB. + * + * A mapped host address is in the device address space, after + * a host address was mapped by the device MMU. + */ + __u64 cb_handle; + + /* Relevant only when HL_CS_FLAGS_WAIT is set. + * This holds address of array of u64 values that contain + * signal CS sequence numbers. The wait described by this job + * will listen on all those signals (wait event per signal) + */ + __u64 signal_seq_arr; + }; + /* Index of queue to put the CB on */ __u32 queue_index; - /* - * Size of command buffer with valid packets - * Can be smaller then actual CB size - */ - __u32 cb_size; + + union { + /* + * Size of command buffer with valid packets + * Can be smaller then actual CB size + */ + __u32 cb_size; + + /* Relevant only when HL_CS_FLAGS_WAIT is set. + * Number of entries in signal_seq_arr + */ + __u32 num_signal_seq_arr; + }; + /* HL_CS_CHUNK_FLAGS_* */ __u32 cs_chunk_flags; + /* Align structure to 64 bytes */ __u32 pad[11]; }; +/* SIGNAL and WAIT flags are mutually exclusive */ #define HL_CS_FLAGS_FORCE_RESTORE 0x1 +#define HL_CS_FLAGS_SIGNAL 0x2 +#define HL_CS_FLAGS_WAIT 0x4 #define HL_CS_STATUS_SUCCESS 0 #define HL_MAX_JOBS_PER_CS 512 struct hl_cs_in { + /* this holds address of array of hl_cs_chunk for restore phase */ __u64 chunks_restore; - /* this holds address of array of hl_cs_chunk for execution phase */ + + /* holds address of array of hl_cs_chunk for execution phase */ __u64 chunks_execute; + /* this holds address of array of hl_cs_chunk for store phase - * Currently not in use */ __u64 chunks_store; + /* Number of chunks in restore phase array. Maximum number is * HL_MAX_JOBS_PER_CS */ __u32 num_chunks_restore; + /* Number of chunks in execution array. Maximum number is * HL_MAX_JOBS_PER_CS */ __u32 num_chunks_execute; + /* Number of chunks in restore phase array - Currently not in use */ __u32 num_chunks_store; + /* HL_CS_FLAGS_* */ __u32 cs_flags; + /* Context ID - Currently not in use */ __u32 ctx_id; }; @@ -597,8 +632,8 @@ struct hl_debug_args { * For jobs on external queues, the user needs to create command buffers * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on * internal queues, the user needs to prepare a "command buffer" with packets - * on either the SRAM or DRAM, and give the device address of that buffer to - * the CS ioctl. + * on either the device SRAM/DRAM or the host, and give the device address of + * that buffer to the CS ioctl. * * This IOCTL is asynchronous in regard to the actual execution of the CS. This * means it returns immediately after ALL the JOBS were enqueued on their @@ -610,7 +645,7 @@ struct hl_debug_args { * external JOBS have been completed. Note that if the CS has internal JOBS * which can execute AFTER the external JOBS have finished, the driver might * report that the CS has finished executing BEFORE the internal JOBS have - * actually finish executing. + * actually finished executing. * * Even though the sequence number increments per CS, the user can NOT * automatically assume that if CS with sequence number N finished, then CS -- cgit v1.2.3 From fca72fbb661f95bed34aff2b9eb8806acab4643e Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Sun, 3 May 2020 17:35:54 +0300 Subject: habanalabs: get card type, location from F/W For Gaudi the driver gets two new additional properties from the F/W: 1. The card's type - PCI or PMC 2. The card's location in the Gaudi's box (relevant only for PMC). The card's location is also passed to the user in the HW IP info structure as it needs this property for establishing communication between Gaudis. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/habanalabs_ioctl.c | 2 ++ drivers/misc/habanalabs/include/armcp_if.h | 22 +++++++++++++++++++--- include/uapi/misc/habanalabs.h | 3 ++- 3 files changed, 23 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c index f5993698d315..52eedd3a6c3a 100644 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c @@ -71,6 +71,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN)); hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version); + hw_ip.module_id = le32_to_cpu(prop->armcp_info.card_location); + hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr; hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf; hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od; diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h index 9e3bc21f20a0..a34fc39ad87e 100644 --- a/drivers/misc/habanalabs/include/armcp_if.h +++ b/drivers/misc/habanalabs/include/armcp_if.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2020 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -35,7 +35,8 @@ struct hl_eq_entry { enum pq_init_status { PQ_INIT_STATUS_NA = 0, PQ_INIT_STATUS_READY_FOR_CP, - PQ_INIT_STATUS_READY_FOR_HOST + PQ_INIT_STATUS_READY_FOR_HOST, + PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI }; /* @@ -350,11 +351,24 @@ struct armcp_sensor { __le32 flags; }; +/** + * struct armcp_card_types - ASIC card type. + * @armcp_card_type_pci: PCI card. + * @armcp_card_type_pmc: PCI Mezzanine Card. + */ +enum armcp_card_types { + armcp_card_type_pci, + armcp_card_type_pmc +}; + /** * struct armcp_info - Info from ArmCP that is necessary to the host's driver * @sensors: available sensors description. * @kernel_version: ArmCP linux kernel version. * @reserved: reserved field. + * @card_type: card configuration type. + * @card_location: in a server, each card has different connections topology + * depending on its location (relevant for PMC card type) * @cpld_version: CPLD programmed F/W version. * @infineon_version: Infineon main DC-DC version. * @fuse_version: silicon production FUSE information. @@ -366,7 +380,9 @@ struct armcp_sensor { struct armcp_info { struct armcp_sensor sensors[ARMCP_MAX_SENSORS]; __u8 kernel_version[VERSION_MAX_LEN]; - __le32 reserved[3]; + __le32 reserved; + __le32 card_type; + __le32 card_location; __le32 cpld_version; __le32 infineon_version; __u8 fuse_version[VERSION_MAX_LEN]; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 70dfccea7038..079613dd7aae 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -125,7 +125,8 @@ struct hl_info_hw_ip_info { __u32 sram_size; __u32 num_of_events; __u32 device_id; /* PCI Device ID */ - __u32 reserved[3]; + __u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */ + __u32 reserved[2]; __u32 armcp_cpld_version; __u32 psoc_pci_pll_nr; __u32 psoc_pci_pll_nf; -- cgit v1.2.3 From 466c7822b054ffe5bb425c8f98d08676501836e8 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 11 May 2020 10:32:10 +0300 Subject: uapi: habanalabs: add gaudi defines Add the new defines for GAUDI uapi interface. It includes the queue IDs, the engine IDs, SRAM reserved space and Sync Manager reserved resources. There is no new IOCTL or additional operations in existing IOCTLs. Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 164 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 162 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 079613dd7aae..f6267a8d7416 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -15,10 +15,13 @@ * Defines that are asic-specific but constitutes as ABI between kernel driver * and userspace */ -#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ +#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ +#define GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START 0x80 /* 128 bytes */ +#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT 48 +#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 24 /* - * Queue Numbering + * Goya queue Numbering * * The external queues (PCI DMA channels) MUST be before the internal queues * and each group (PCI DMA channels and internal) must be contiguous inside @@ -45,6 +48,129 @@ enum goya_queue_id { GOYA_QUEUE_ID_SIZE }; +/* + * Gaudi queue Numbering + * External queues (PCI DMA channels) are DMA_0_*, DMA_1_* and DMA_5_*. + * Except one CPU queue, all the rest are internal queues. + */ + +enum gaudi_queue_id { + GAUDI_QUEUE_ID_DMA_0_0 = 0, /* external */ + GAUDI_QUEUE_ID_DMA_0_1 = 1, /* external */ + GAUDI_QUEUE_ID_DMA_0_2 = 2, /* external */ + GAUDI_QUEUE_ID_DMA_0_3 = 3, /* external */ + GAUDI_QUEUE_ID_DMA_1_0 = 4, /* external */ + GAUDI_QUEUE_ID_DMA_1_1 = 5, /* external */ + GAUDI_QUEUE_ID_DMA_1_2 = 6, /* external */ + GAUDI_QUEUE_ID_DMA_1_3 = 7, /* external */ + GAUDI_QUEUE_ID_CPU_PQ = 8, /* CPU */ + GAUDI_QUEUE_ID_DMA_2_0 = 9, /* internal */ + GAUDI_QUEUE_ID_DMA_2_1 = 10, /* internal */ + GAUDI_QUEUE_ID_DMA_2_2 = 11, /* internal */ + GAUDI_QUEUE_ID_DMA_2_3 = 12, /* internal */ + GAUDI_QUEUE_ID_DMA_3_0 = 13, /* internal */ + GAUDI_QUEUE_ID_DMA_3_1 = 14, /* internal */ + GAUDI_QUEUE_ID_DMA_3_2 = 15, /* internal */ + GAUDI_QUEUE_ID_DMA_3_3 = 16, /* internal */ + GAUDI_QUEUE_ID_DMA_4_0 = 17, /* internal */ + GAUDI_QUEUE_ID_DMA_4_1 = 18, /* internal */ + GAUDI_QUEUE_ID_DMA_4_2 = 19, /* internal */ + GAUDI_QUEUE_ID_DMA_4_3 = 20, /* internal */ + GAUDI_QUEUE_ID_DMA_5_0 = 21, /* external */ + GAUDI_QUEUE_ID_DMA_5_1 = 22, /* external */ + GAUDI_QUEUE_ID_DMA_5_2 = 23, /* external */ + GAUDI_QUEUE_ID_DMA_5_3 = 24, /* external */ + GAUDI_QUEUE_ID_DMA_6_0 = 25, /* internal */ + GAUDI_QUEUE_ID_DMA_6_1 = 26, /* internal */ + GAUDI_QUEUE_ID_DMA_6_2 = 27, /* internal */ + GAUDI_QUEUE_ID_DMA_6_3 = 28, /* internal */ + GAUDI_QUEUE_ID_DMA_7_0 = 29, /* internal */ + GAUDI_QUEUE_ID_DMA_7_1 = 30, /* internal */ + GAUDI_QUEUE_ID_DMA_7_2 = 31, /* internal */ + GAUDI_QUEUE_ID_DMA_7_3 = 32, /* internal */ + GAUDI_QUEUE_ID_MME_0_0 = 33, /* internal */ + GAUDI_QUEUE_ID_MME_0_1 = 34, /* internal */ + GAUDI_QUEUE_ID_MME_0_2 = 35, /* internal */ + GAUDI_QUEUE_ID_MME_0_3 = 36, /* internal */ + GAUDI_QUEUE_ID_MME_1_0 = 37, /* internal */ + GAUDI_QUEUE_ID_MME_1_1 = 38, /* internal */ + GAUDI_QUEUE_ID_MME_1_2 = 39, /* internal */ + GAUDI_QUEUE_ID_MME_1_3 = 40, /* internal */ + GAUDI_QUEUE_ID_TPC_0_0 = 41, /* internal */ + GAUDI_QUEUE_ID_TPC_0_1 = 42, /* internal */ + GAUDI_QUEUE_ID_TPC_0_2 = 43, /* internal */ + GAUDI_QUEUE_ID_TPC_0_3 = 44, /* internal */ + GAUDI_QUEUE_ID_TPC_1_0 = 45, /* internal */ + GAUDI_QUEUE_ID_TPC_1_1 = 46, /* internal */ + GAUDI_QUEUE_ID_TPC_1_2 = 47, /* internal */ + GAUDI_QUEUE_ID_TPC_1_3 = 48, /* internal */ + GAUDI_QUEUE_ID_TPC_2_0 = 49, /* internal */ + GAUDI_QUEUE_ID_TPC_2_1 = 50, /* internal */ + GAUDI_QUEUE_ID_TPC_2_2 = 51, /* internal */ + GAUDI_QUEUE_ID_TPC_2_3 = 52, /* internal */ + GAUDI_QUEUE_ID_TPC_3_0 = 53, /* internal */ + GAUDI_QUEUE_ID_TPC_3_1 = 54, /* internal */ + GAUDI_QUEUE_ID_TPC_3_2 = 55, /* internal */ + GAUDI_QUEUE_ID_TPC_3_3 = 56, /* internal */ + GAUDI_QUEUE_ID_TPC_4_0 = 57, /* internal */ + GAUDI_QUEUE_ID_TPC_4_1 = 58, /* internal */ + GAUDI_QUEUE_ID_TPC_4_2 = 59, /* internal */ + GAUDI_QUEUE_ID_TPC_4_3 = 60, /* internal */ + GAUDI_QUEUE_ID_TPC_5_0 = 61, /* internal */ + GAUDI_QUEUE_ID_TPC_5_1 = 62, /* internal */ + GAUDI_QUEUE_ID_TPC_5_2 = 63, /* internal */ + GAUDI_QUEUE_ID_TPC_5_3 = 64, /* internal */ + GAUDI_QUEUE_ID_TPC_6_0 = 65, /* internal */ + GAUDI_QUEUE_ID_TPC_6_1 = 66, /* internal */ + GAUDI_QUEUE_ID_TPC_6_2 = 67, /* internal */ + GAUDI_QUEUE_ID_TPC_6_3 = 68, /* internal */ + GAUDI_QUEUE_ID_TPC_7_0 = 69, /* internal */ + GAUDI_QUEUE_ID_TPC_7_1 = 70, /* internal */ + GAUDI_QUEUE_ID_TPC_7_2 = 71, /* internal */ + GAUDI_QUEUE_ID_TPC_7_3 = 72, /* internal */ + GAUDI_QUEUE_ID_NIC_0_0 = 73, /* internal */ + GAUDI_QUEUE_ID_NIC_0_1 = 74, /* internal */ + GAUDI_QUEUE_ID_NIC_0_2 = 75, /* internal */ + GAUDI_QUEUE_ID_NIC_0_3 = 76, /* internal */ + GAUDI_QUEUE_ID_NIC_1_0 = 77, /* internal */ + GAUDI_QUEUE_ID_NIC_1_1 = 78, /* internal */ + GAUDI_QUEUE_ID_NIC_1_2 = 79, /* internal */ + GAUDI_QUEUE_ID_NIC_1_3 = 80, /* internal */ + GAUDI_QUEUE_ID_NIC_2_0 = 81, /* internal */ + GAUDI_QUEUE_ID_NIC_2_1 = 82, /* internal */ + GAUDI_QUEUE_ID_NIC_2_2 = 83, /* internal */ + GAUDI_QUEUE_ID_NIC_2_3 = 84, /* internal */ + GAUDI_QUEUE_ID_NIC_3_0 = 85, /* internal */ + GAUDI_QUEUE_ID_NIC_3_1 = 86, /* internal */ + GAUDI_QUEUE_ID_NIC_3_2 = 87, /* internal */ + GAUDI_QUEUE_ID_NIC_3_3 = 88, /* internal */ + GAUDI_QUEUE_ID_NIC_4_0 = 89, /* internal */ + GAUDI_QUEUE_ID_NIC_4_1 = 90, /* internal */ + GAUDI_QUEUE_ID_NIC_4_2 = 91, /* internal */ + GAUDI_QUEUE_ID_NIC_4_3 = 92, /* internal */ + GAUDI_QUEUE_ID_NIC_5_0 = 93, /* internal */ + GAUDI_QUEUE_ID_NIC_5_1 = 94, /* internal */ + GAUDI_QUEUE_ID_NIC_5_2 = 95, /* internal */ + GAUDI_QUEUE_ID_NIC_5_3 = 96, /* internal */ + GAUDI_QUEUE_ID_NIC_6_0 = 97, /* internal */ + GAUDI_QUEUE_ID_NIC_6_1 = 98, /* internal */ + GAUDI_QUEUE_ID_NIC_6_2 = 99, /* internal */ + GAUDI_QUEUE_ID_NIC_6_3 = 100, /* internal */ + GAUDI_QUEUE_ID_NIC_7_0 = 101, /* internal */ + GAUDI_QUEUE_ID_NIC_7_1 = 102, /* internal */ + GAUDI_QUEUE_ID_NIC_7_2 = 103, /* internal */ + GAUDI_QUEUE_ID_NIC_7_3 = 104, /* internal */ + GAUDI_QUEUE_ID_NIC_8_0 = 105, /* internal */ + GAUDI_QUEUE_ID_NIC_8_1 = 106, /* internal */ + GAUDI_QUEUE_ID_NIC_8_2 = 107, /* internal */ + GAUDI_QUEUE_ID_NIC_8_3 = 108, /* internal */ + GAUDI_QUEUE_ID_NIC_9_0 = 109, /* internal */ + GAUDI_QUEUE_ID_NIC_9_1 = 110, /* internal */ + GAUDI_QUEUE_ID_NIC_9_2 = 111, /* internal */ + GAUDI_QUEUE_ID_NIC_9_3 = 112, /* internal */ + GAUDI_QUEUE_ID_SIZE +}; + /* * Engine Numbering * @@ -69,6 +195,40 @@ enum goya_engine_id { GOYA_ENGINE_ID_SIZE }; +enum gaudi_engine_id { + GAUDI_ENGINE_ID_DMA_0 = 0, + GAUDI_ENGINE_ID_DMA_1, + GAUDI_ENGINE_ID_DMA_2, + GAUDI_ENGINE_ID_DMA_3, + GAUDI_ENGINE_ID_DMA_4, + GAUDI_ENGINE_ID_DMA_5, + GAUDI_ENGINE_ID_DMA_6, + GAUDI_ENGINE_ID_DMA_7, + GAUDI_ENGINE_ID_MME_0, + GAUDI_ENGINE_ID_MME_1, + GAUDI_ENGINE_ID_MME_2, + GAUDI_ENGINE_ID_MME_3, + GAUDI_ENGINE_ID_TPC_0, + GAUDI_ENGINE_ID_TPC_1, + GAUDI_ENGINE_ID_TPC_2, + GAUDI_ENGINE_ID_TPC_3, + GAUDI_ENGINE_ID_TPC_4, + GAUDI_ENGINE_ID_TPC_5, + GAUDI_ENGINE_ID_TPC_6, + GAUDI_ENGINE_ID_TPC_7, + GAUDI_ENGINE_ID_NIC_0, + GAUDI_ENGINE_ID_NIC_1, + GAUDI_ENGINE_ID_NIC_2, + GAUDI_ENGINE_ID_NIC_3, + GAUDI_ENGINE_ID_NIC_4, + GAUDI_ENGINE_ID_NIC_5, + GAUDI_ENGINE_ID_NIC_6, + GAUDI_ENGINE_ID_NIC_7, + GAUDI_ENGINE_ID_NIC_8, + GAUDI_ENGINE_ID_NIC_9, + GAUDI_ENGINE_ID_SIZE +}; + enum hl_device_status { HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_IN_RESET, -- cgit v1.2.3 From 3234ac664a870e6ea69ae3a57d824cd7edbeacc5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 May 2020 14:06:17 -0700 Subject: /dev/mem: Revoke mappings when a driver claims the region Close the hole of holding a mapping over kernel driver takeover event of a given address range. Commit 90a545e98126 ("restrict /dev/mem to idle io memory ranges") introduced CONFIG_IO_STRICT_DEVMEM with the goal of protecting the kernel against scenarios where a /dev/mem user tramples memory that a kernel driver owns. However, this protection only prevents *new* read(), write() and mmap() requests. Established mappings prior to the driver calling request_mem_region() are left alone. Especially with persistent memory, and the core kernel metadata that is stored there, there are plentiful scenarios for a /dev/mem user to violate the expectations of the driver and cause amplified damage. Teach request_mem_region() to find and shoot down active /dev/mem mappings that it believes it has successfully claimed for the exclusive use of the driver. Effectively a driver call to request_mem_region() becomes a hole-punch on the /dev/mem device. The typical usage of unmap_mapping_range() is part of truncate_pagecache() to punch a hole in a file, but in this case the implementation is only doing the "first half" of a hole punch. Namely it is just evacuating current established mappings of the "hole", and it relies on the fact that /dev/mem establishes mappings in terms of absolute physical address offsets. Once existing mmap users are invalidated they can attempt to re-establish the mapping, or attempt to continue issuing read(2) / write(2) to the invalidated extent, but they will then be subject to the CONFIG_IO_STRICT_DEVMEM checking that can block those subsequent accesses. Cc: Arnd Bergmann Cc: Ingo Molnar Cc: Kees Cook Cc: Matthew Wilcox Cc: Russell King Cc: Andrew Morton Cc: Greg Kroah-Hartman Fixes: 90a545e98126 ("restrict /dev/mem to idle io memory ranges") Signed-off-by: Dan Williams Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/159009507306.847224.8502634072429766747.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/mem.c | 101 ++++++++++++++++++++++++++++++++++++++++++++- include/linux/ioport.h | 6 +++ include/uapi/linux/magic.h | 1 + kernel/resource.c | 5 +++ 4 files changed, 111 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 43dd0891ca1e..31cae88a730b 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -31,11 +31,15 @@ #include #include #include +#include +#include +#include #ifdef CONFIG_IA64 # include #endif +#define DEVMEM_MINOR 1 #define DEVPORT_MINOR 4 static inline unsigned long size_inside_page(unsigned long start, @@ -805,12 +809,64 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig) return ret; } +static struct inode *devmem_inode; + +#ifdef CONFIG_IO_STRICT_DEVMEM +void revoke_devmem(struct resource *res) +{ + struct inode *inode = READ_ONCE(devmem_inode); + + /* + * Check that the initialization has completed. Losing the race + * is ok because it means drivers are claiming resources before + * the fs_initcall level of init and prevent /dev/mem from + * establishing mappings. + */ + if (!inode) + return; + + /* + * The expectation is that the driver has successfully marked + * the resource busy by this point, so devmem_is_allowed() + * should start returning false, however for performance this + * does not iterate the entire resource range. + */ + if (devmem_is_allowed(PHYS_PFN(res->start)) && + devmem_is_allowed(PHYS_PFN(res->end))) { + /* + * *cringe* iomem=relaxed says "go ahead, what's the + * worst that can happen?" + */ + return; + } + + unmap_mapping_range(inode->i_mapping, res->start, resource_size(res), 1); +} +#endif + static int open_port(struct inode *inode, struct file *filp) { + int rc; + if (!capable(CAP_SYS_RAWIO)) return -EPERM; - return security_locked_down(LOCKDOWN_DEV_MEM); + rc = security_locked_down(LOCKDOWN_DEV_MEM); + if (rc) + return rc; + + if (iminor(inode) != DEVMEM_MINOR) + return 0; + + /* + * Use a unified address space to have a single point to manage + * revocations when drivers want to take over a /dev/mem mapped + * range. + */ + inode->i_mapping = devmem_inode->i_mapping; + filp->f_mapping = inode->i_mapping; + + return 0; } #define zero_lseek null_lseek @@ -885,7 +941,7 @@ static const struct memdev { fmode_t fmode; } devlist[] = { #ifdef CONFIG_DEVMEM - [1] = { "mem", 0, &mem_fops, FMODE_UNSIGNED_OFFSET }, + [DEVMEM_MINOR] = { "mem", 0, &mem_fops, FMODE_UNSIGNED_OFFSET }, #endif #ifdef CONFIG_DEVKMEM [2] = { "kmem", 0, &kmem_fops, FMODE_UNSIGNED_OFFSET }, @@ -939,6 +995,45 @@ static char *mem_devnode(struct device *dev, umode_t *mode) static struct class *mem_class; +static int devmem_fs_init_fs_context(struct fs_context *fc) +{ + return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM; +} + +static struct file_system_type devmem_fs_type = { + .name = "devmem", + .owner = THIS_MODULE, + .init_fs_context = devmem_fs_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static int devmem_init_inode(void) +{ + static struct vfsmount *devmem_vfs_mount; + static int devmem_fs_cnt; + struct inode *inode; + int rc; + + rc = simple_pin_fs(&devmem_fs_type, &devmem_vfs_mount, &devmem_fs_cnt); + if (rc < 0) { + pr_err("Cannot mount /dev/mem pseudo filesystem: %d\n", rc); + return rc; + } + + inode = alloc_anon_inode(devmem_vfs_mount->mnt_sb); + if (IS_ERR(inode)) { + rc = PTR_ERR(inode); + pr_err("Cannot allocate inode for /dev/mem: %d\n", rc); + simple_release_fs(&devmem_vfs_mount, &devmem_fs_cnt); + return rc; + } + + /* publish /dev/mem initialized */ + WRITE_ONCE(devmem_inode, inode); + + return 0; +} + static int __init chr_dev_init(void) { int minor; @@ -960,6 +1055,8 @@ static int __init chr_dev_init(void) */ if ((minor == DEVPORT_MINOR) && !arch_has_dev_port()) continue; + if ((minor == DEVMEM_MINOR) && devmem_init_inode() != 0) + continue; device_create(mem_class, NULL, MKDEV(MEM_MAJOR, minor), NULL, devlist[minor].name); diff --git a/include/linux/ioport.h b/include/linux/ioport.h index a9b9170b5dd2..6c3eca90cbc4 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -301,5 +301,11 @@ struct resource *devm_request_free_mem_region(struct device *dev, struct resource *request_free_mem_region(struct resource *base, unsigned long size, const char *name); +#ifdef CONFIG_IO_STRICT_DEVMEM +void revoke_devmem(struct resource *res); +#else +static inline void revoke_devmem(struct resource *res) { }; +#endif + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index d78064007b17..f3956fc11de6 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -94,6 +94,7 @@ #define BALLOON_KVM_MAGIC 0x13661366 #define ZSMALLOC_MAGIC 0x58295829 #define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */ +#define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ #define Z3FOLD_MAGIC 0x33 #define PPC_CMM_MAGIC 0xc7571590 diff --git a/kernel/resource.c b/kernel/resource.c index 76036a41143b..841737bbda9e 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1126,6 +1126,7 @@ struct resource * __request_region(struct resource *parent, { DECLARE_WAITQUEUE(wait, current); struct resource *res = alloc_resource(GFP_KERNEL); + struct resource *orig_parent = parent; if (!res) return NULL; @@ -1176,6 +1177,10 @@ struct resource * __request_region(struct resource *parent, break; } write_unlock(&resource_lock); + + if (res && orig_parent == &iomem_resource) + revoke_devmem(res); + return res; } EXPORT_SYMBOL(__request_region); -- cgit v1.2.3