From 6dd0e5cc87b33aee9cfe4d485c4d8b8382701558 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:11 -0700 Subject: cxl/mbox: Move cxl_mem_command construction to helper funcs Sanitizing and constructing a cxl_mem_command from a userspace command is part of the validation process prior to submitting the command to a CXL device. Move this work to helper functions: cxl_to_mem_cmd(), cxl_to_mem_cmd_raw(). This declutters cxl_validate_cmd_from_user() in preparation for adding new validation steps. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/7d9b826f29262e3a484cb4bb7b63872134d60bd7.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 147 +++++++++++++++++++++++++----------------------- 1 file changed, 76 insertions(+), 71 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index be61a0d8016b..1f2175f0f687 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -207,76 +207,42 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) return true; } -/** - * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. - * @cxlds: The device data for the operation - * @send_cmd: &struct cxl_send_command copied in from userspace. - * @out_cmd: Sanitized and populated &struct cxl_mem_command. - * - * Return: - * * %0 - @out_cmd is ready to send. - * * %-ENOTTY - Invalid command specified. - * * %-EINVAL - Reserved fields or invalid values were used. - * * %-ENOMEM - Input or output buffer wasn't sized properly. - * * %-EPERM - Attempted to use a protected command. - * * %-EBUSY - Kernel has claimed exclusive access to this opcode - * - * The result of this command is a fully validated command in @out_cmd that is - * safe to send to the hardware. - * - * See handle_mailbox_cmd_from_user() - */ -static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, - const struct cxl_send_command *send_cmd, - struct cxl_mem_command *out_cmd) +static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, + const struct cxl_send_command *send_cmd, + struct cxl_dev_state *cxlds) { - const struct cxl_command_info *info; - struct cxl_mem_command *c; - - if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX) - return -ENOTTY; - - /* - * The user can never specify an input payload larger than what hardware - * supports, but output can be arbitrarily large (simply write out as - * much data as the hardware provides). - */ - if (send_cmd->in.size > cxlds->payload_size) + if (send_cmd->raw.rsvd) return -EINVAL; /* - * Checks are bypassed for raw commands but a WARN/taint will occur - * later in the callchain + * Unlike supported commands, the output size of RAW commands + * gets passed along without further checking, so it must be + * validated here. */ - if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) { - const struct cxl_mem_command temp = { - .info = { - .id = CXL_MEM_COMMAND_ID_RAW, - .flags = 0, - .size_in = send_cmd->in.size, - .size_out = send_cmd->out.size, - }, - .opcode = send_cmd->raw.opcode - }; - - if (send_cmd->raw.rsvd) - return -EINVAL; + if (send_cmd->out.size > cxlds->payload_size) + return -EINVAL; - /* - * Unlike supported commands, the output size of RAW commands - * gets passed along without further checking, so it must be - * validated here. - */ - if (send_cmd->out.size > cxlds->payload_size) - return -EINVAL; + if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) + return -EPERM; - if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) - return -EPERM; + *mem_cmd = (struct cxl_mem_command) { + .info = { + .id = CXL_MEM_COMMAND_ID_RAW, + .size_in = send_cmd->in.size, + .size_out = send_cmd->out.size, + }, + .opcode = send_cmd->raw.opcode + }; - memcpy(out_cmd, &temp, sizeof(temp)); + return 0; +} - return 0; - } +static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, + const struct cxl_send_command *send_cmd, + struct cxl_dev_state *cxlds) +{ + struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id]; + const struct cxl_command_info *info = &c->info; if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK) return -EINVAL; @@ -287,10 +253,6 @@ static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, if (send_cmd->in.rsvd || send_cmd->out.rsvd) return -EINVAL; - /* Convert user's command into the internal representation */ - c = &cxl_mem_commands[send_cmd->id]; - info = &c->info; - /* Check that the command is enabled for hardware */ if (!test_bit(info->id, cxlds->enabled_cmds)) return -ENOTTY; @@ -307,15 +269,58 @@ static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, if (info->size_out >= 0 && send_cmd->out.size < info->size_out) return -ENOMEM; - memcpy(out_cmd, c, sizeof(*c)); - out_cmd->info.size_in = send_cmd->in.size; + *mem_cmd = (struct cxl_mem_command) { + .info = { + .id = info->id, + .flags = info->flags, + .size_in = send_cmd->in.size, + .size_out = send_cmd->out.size, + }, + .opcode = c->opcode + }; + + return 0; +} + +/** + * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. + * @cxlds: The device data for the operation + * @send_cmd: &struct cxl_send_command copied in from userspace. + * @out_cmd: Sanitized and populated &struct cxl_mem_command. + * + * Return: + * * %0 - @out_cmd is ready to send. + * * %-ENOTTY - Invalid command specified. + * * %-EINVAL - Reserved fields or invalid values were used. + * * %-ENOMEM - Input or output buffer wasn't sized properly. + * * %-EPERM - Attempted to use a protected command. + * * %-EBUSY - Kernel has claimed exclusive access to this opcode + * + * The result of this command is a fully validated command in @out_cmd that is + * safe to send to the hardware. + * + * See handle_mailbox_cmd_from_user() + */ +static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, + const struct cxl_send_command *send_cmd, + struct cxl_mem_command *out_cmd) +{ + if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX) + return -ENOTTY; + /* - * XXX: out_cmd->info.size_out will be controlled by the driver, and the - * specified number of bytes @send_cmd->out.size will be copied back out - * to userspace. + * The user can never specify an input payload larger than what hardware + * supports, but output can be arbitrarily large (simply write out as + * much data as the hardware provides). */ + if (send_cmd->in.size > cxlds->payload_size) + return -EINVAL; - return 0; + /* Sanitize and construct a cxl_mem_command */ + if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) + return cxl_to_mem_cmd_raw(out_cmd, send_cmd, cxlds); + else + return cxl_to_mem_cmd(out_cmd, send_cmd, cxlds); } int cxl_query_cmd(struct cxl_memdev *cxlmd, -- cgit v1.2.3 From 39ed8da4f341c8dbe9c15f716d3a328269a07fc7 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:12 -0700 Subject: cxl/mbox: Move raw command warning to raw command validation This move serves two purposes: 1) Emit the warning in the raw command validation path, and 2) Remove the dependency on the struct cxl_mem_command in handle_mailbox_cmd_from_user() in preparation for a refactor of that function. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/df5f0e0ec8afa1f75299aa86b4226ab4479ef325.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 1f2175f0f687..39c8f6ded175 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -225,6 +225,8 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) return -EPERM; + dev_WARN_ONCE(cxlds->dev, true, "raw command path used\n"); + *mem_cmd = (struct cxl_mem_command) { .info = { .id = CXL_MEM_COMMAND_ID_RAW, @@ -417,9 +419,6 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, cxl_command_names[cmd->info.id].name, mbox_cmd.opcode, cmd->info.size_in); - dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW, - "raw command path used\n"); - rc = cxlds->mbox_send(cxlds, &mbox_cmd); if (rc) goto out; -- cgit v1.2.3 From 63cf60b7e0a556b3542e6e915dfe9c93eaa559fd Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:13 -0700 Subject: cxl/mbox: Move build of user mailbox cmd to a helper functions In preparation for moving the construction of a mailbox command to the validation path, extract the work into a helper functions. Signed-off-by: Alison Schofield Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/493d7618a846d787c3ae28778935ca35e2b85eed.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 70 +++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 39c8f6ded175..99b933fb2d9b 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -207,6 +207,44 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) return true; } +static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, + struct cxl_dev_state *cxlds, u16 opcode, + size_t in_size, size_t out_size, u64 in_payload) +{ + *mbox = (struct cxl_mbox_cmd) { + .opcode = opcode, + .size_in = in_size, + }; + + if (in_size) { + mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload), + in_size); + if (!mbox->payload_in) + return PTR_ERR(mbox->payload_in); + } + + /* Prepare to handle a full payload for variable sized output */ + if (out_size < 0) + mbox->size_out = cxlds->payload_size; + else + mbox->size_out = out_size; + + if (mbox->size_out) { + mbox->payload_out = kvzalloc(mbox->size_out, GFP_KERNEL); + if (!mbox->payload_out) { + kvfree(mbox->payload_in); + return -ENOMEM; + } + } + return 0; +} + +static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox) +{ + kvfree(mbox->payload_in); + kvfree(mbox->payload_out); +} + static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, const struct cxl_send_command *send_cmd, struct cxl_dev_state *cxlds) @@ -390,27 +428,14 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, s32 *size_out, u32 *retval) { struct device *dev = cxlds->dev; - struct cxl_mbox_cmd mbox_cmd = { - .opcode = cmd->opcode, - .size_in = cmd->info.size_in, - .size_out = cmd->info.size_out, - }; + struct cxl_mbox_cmd mbox_cmd; int rc; - if (cmd->info.size_out) { - mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL); - if (!mbox_cmd.payload_out) - return -ENOMEM; - } - - if (cmd->info.size_in) { - mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload), - cmd->info.size_in); - if (IS_ERR(mbox_cmd.payload_in)) { - kvfree(mbox_cmd.payload_out); - return PTR_ERR(mbox_cmd.payload_in); - } - } + rc = cxl_mbox_cmd_ctor(&mbox_cmd, cxlds, cmd->opcode, + cmd->info.size_in, cmd->info.size_out, + in_payload); + if (rc) + return rc; dev_dbg(dev, "Submitting %s command for user\n" @@ -442,8 +467,7 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, *retval = mbox_cmd.return_code; out: - kvfree(mbox_cmd.payload_in); - kvfree(mbox_cmd.payload_out); + cxl_mbox_cmd_dtor(&mbox_cmd); return rc; } @@ -464,10 +488,6 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) if (rc) return rc; - /* Prepare to handle a full payload for variable sized output */ - if (c.info.size_out < 0) - c.info.size_out = cxlds->payload_size; - rc = handle_mailbox_cmd_from_user(cxlds, &c, send.in.payload, send.out.payload, &send.out.size, &send.retval); -- cgit v1.2.3 From 9ae016aeb722504703c67e6e59c673719868f467 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:14 -0700 Subject: cxl/mbox: Construct a users cxl_mbox_cmd in the validation path This is a step in refactoring the handling of user space mailbox commands. The intent is to have all the validation work originate in cxl_validate_cmd_from_user(). Move the construction and validation of a mailbox command to the validation path. Continue to pass both the out_cmd and the mbox_cmd until handle_mbox_cmd_from_user() learns how to use a mbox_cmd param. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/c9fbdad968a2b619f9108bb6c37cef1a853cdf5a.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 99b933fb2d9b..47f39c6131c4 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -324,6 +324,7 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, /** * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. + * @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd. * @cxlds: The device data for the operation * @send_cmd: &struct cxl_send_command copied in from userspace. * @out_cmd: Sanitized and populated &struct cxl_mem_command. @@ -341,10 +342,13 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, * * See handle_mailbox_cmd_from_user() */ -static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, +static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, + struct cxl_dev_state *cxlds, const struct cxl_send_command *send_cmd, struct cxl_mem_command *out_cmd) { + int rc; + if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX) return -ENOTTY; @@ -358,9 +362,17 @@ static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, /* Sanitize and construct a cxl_mem_command */ if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) - return cxl_to_mem_cmd_raw(out_cmd, send_cmd, cxlds); + rc = cxl_to_mem_cmd_raw(out_cmd, send_cmd, cxlds); else - return cxl_to_mem_cmd(out_cmd, send_cmd, cxlds); + rc = cxl_to_mem_cmd(out_cmd, send_cmd, cxlds); + + if (rc) + return rc; + + /* Sanitize and construct a cxl_mbox_cmd */ + return cxl_mbox_cmd_ctor(mbox_cmd, cxlds, out_cmd->opcode, + out_cmd->info.size_in, out_cmd->info.size_out, + send_cmd->in.payload); } int cxl_query_cmd(struct cxl_memdev *cxlmd, @@ -477,6 +489,7 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) struct device *dev = &cxlmd->dev; struct cxl_send_command send; struct cxl_mem_command c; + struct cxl_mbox_cmd mbox_cmd; int rc; dev_dbg(dev, "Send IOCTL\n"); @@ -484,7 +497,7 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) if (copy_from_user(&send, s, sizeof(send))) return -EFAULT; - rc = cxl_validate_cmd_from_user(cxlmd->cxlds, &send, &c); + rc = cxl_validate_cmd_from_user(&mbox_cmd, cxlmd->cxlds, &send, &c); if (rc) return rc; -- cgit v1.2.3 From 82b8ba29538e5dae0a4e481dbbea4d5015c683af Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:15 -0700 Subject: cxl/mbox: Remove dependency on cxl_mem_command for a debug msg In preparation for removing access to struct cxl_mem_command, change this debug message to use cxl_mbox_cmd fields instead. Retrieve the pretty command name from cxl_mbox_cmd using a new opcode to command name helper. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/57265751d336a6e95f5ca31a9c77189408b05742.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 47f39c6131c4..8c59c6959c15 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -127,6 +127,17 @@ static struct cxl_mem_command *cxl_mem_find_command(u16 opcode) return NULL; } +static const char *cxl_mem_opcode_to_name(u16 opcode) +{ + struct cxl_mem_command *c; + + c = cxl_mem_find_command(opcode); + if (!c) + return NULL; + + return cxl_command_names[c->info.id].name; +} + /** * cxl_mbox_send_cmd() - Send a mailbox command to a device. * @cxlds: The device data for the operation @@ -452,9 +463,9 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, dev_dbg(dev, "Submitting %s command for user\n" "\topcode: %x\n" - "\tsize: %ub\n", - cxl_command_names[cmd->info.id].name, mbox_cmd.opcode, - cmd->info.size_in); + "\tsize: %zx\n", + cxl_mem_opcode_to_name(mbox_cmd.opcode), + mbox_cmd.opcode, mbox_cmd.size_in); rc = cxlds->mbox_send(cxlds, &mbox_cmd); if (rc) -- cgit v1.2.3 From d97fe8eec2b895b2aa2f7bb3d55e7b88bc2d53f3 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:16 -0700 Subject: cxl/mbox: Make handle_mailbox_cmd_from_user() use a mbox param Previously, handle_mailbox_cmd_from_user(), constructed the mailbox command and dispatched it to the hardware. The construction work has moved to the validation path. handle_mailbox_cmd_from_user() now expects a fully validated mbox param. Make it's caller, cxl_send_cmd(), deliver it. Update the comments and dereferencing of the new mbox parameter. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/77050ba512d6c30eccf7505467509e460dd325a0.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 44 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 8c59c6959c15..b4bef3c50ee3 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -423,8 +423,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, /** * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace. * @cxlds: The device data for the operation - * @cmd: The validated command. - * @in_payload: Pointer to userspace's input payload. + * @mbox_cmd: The validated mailbox command. * @out_payload: Pointer to userspace's output payload. * @size_out: (Input) Max payload size to copy out. * (Output) Payload size hardware generated. @@ -439,35 +438,27 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, * * %-EINTR - Mailbox acquisition interrupted. * * %-EXXX - Transaction level failures. * - * Creates the appropriate mailbox command and dispatches it on behalf of a - * userspace request. The input and output payloads are copied between - * userspace. + * Dispatches a mailbox command on behalf of a userspace request. + * The output payload is copied to userspace. * * See cxl_send_cmd(). */ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, - const struct cxl_mem_command *cmd, - u64 in_payload, u64 out_payload, - s32 *size_out, u32 *retval) + struct cxl_mbox_cmd *mbox_cmd, + u64 out_payload, s32 *size_out, + u32 *retval) { struct device *dev = cxlds->dev; - struct cxl_mbox_cmd mbox_cmd; int rc; - rc = cxl_mbox_cmd_ctor(&mbox_cmd, cxlds, cmd->opcode, - cmd->info.size_in, cmd->info.size_out, - in_payload); - if (rc) - return rc; - dev_dbg(dev, "Submitting %s command for user\n" "\topcode: %x\n" "\tsize: %zx\n", - cxl_mem_opcode_to_name(mbox_cmd.opcode), - mbox_cmd.opcode, mbox_cmd.size_in); + cxl_mem_opcode_to_name(mbox_cmd->opcode), + mbox_cmd->opcode, mbox_cmd->size_in); - rc = cxlds->mbox_send(cxlds, &mbox_cmd); + rc = cxlds->mbox_send(cxlds, mbox_cmd); if (rc) goto out; @@ -476,21 +467,21 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, * to userspace. While the payload may have written more output than * this it will have to be ignored. */ - if (mbox_cmd.size_out) { - dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out, + if (mbox_cmd->size_out) { + dev_WARN_ONCE(dev, mbox_cmd->size_out > *size_out, "Invalid return size\n"); if (copy_to_user(u64_to_user_ptr(out_payload), - mbox_cmd.payload_out, mbox_cmd.size_out)) { + mbox_cmd->payload_out, mbox_cmd->size_out)) { rc = -EFAULT; goto out; } } - *size_out = mbox_cmd.size_out; - *retval = mbox_cmd.return_code; + *size_out = mbox_cmd->size_out; + *retval = mbox_cmd->return_code; out: - cxl_mbox_cmd_dtor(&mbox_cmd); + cxl_mbox_cmd_dtor(mbox_cmd); return rc; } @@ -512,9 +503,8 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) if (rc) return rc; - rc = handle_mailbox_cmd_from_user(cxlds, &c, send.in.payload, - send.out.payload, &send.out.size, - &send.retval); + rc = handle_mailbox_cmd_from_user(cxlds, &mbox_cmd, send.out.payload, + &send.out.size, &send.retval); if (rc) return rc; -- cgit v1.2.3 From 2dd5600a0e4e1aca8541e1c5bb2bc26bc0441d4d Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:17 -0700 Subject: cxl/mbox: Move cxl_mem_command param to a local variable cxl_validate_command_from_user() is now the single point of validation for mailbox commands coming from user space. Previously, it returned a a cxl_mem_command, but that was not sufficient when validation of the actual mailbox command became a requirement. Now, it returns a fully validated cxl_mbox_cmd. Remove the extraneous cxl_mem_command parameter. Define and use a local version only. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/c11a437896d914daf36f5ac8ec62f999c5ec2da7.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index b4bef3c50ee3..704d6b313144 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -338,7 +338,6 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, * @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd. * @cxlds: The device data for the operation * @send_cmd: &struct cxl_send_command copied in from userspace. - * @out_cmd: Sanitized and populated &struct cxl_mem_command. * * Return: * * %0 - @out_cmd is ready to send. @@ -348,16 +347,14 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, * * %-EPERM - Attempted to use a protected command. * * %-EBUSY - Kernel has claimed exclusive access to this opcode * - * The result of this command is a fully validated command in @out_cmd that is + * The result of this command is a fully validated command in @mbox_cmd that is * safe to send to the hardware. - * - * See handle_mailbox_cmd_from_user() */ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, struct cxl_dev_state *cxlds, - const struct cxl_send_command *send_cmd, - struct cxl_mem_command *out_cmd) + const struct cxl_send_command *send_cmd) { + struct cxl_mem_command mem_cmd; int rc; if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX) @@ -373,16 +370,16 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, /* Sanitize and construct a cxl_mem_command */ if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) - rc = cxl_to_mem_cmd_raw(out_cmd, send_cmd, cxlds); + rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxlds); else - rc = cxl_to_mem_cmd(out_cmd, send_cmd, cxlds); + rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxlds); if (rc) return rc; /* Sanitize and construct a cxl_mbox_cmd */ - return cxl_mbox_cmd_ctor(mbox_cmd, cxlds, out_cmd->opcode, - out_cmd->info.size_in, out_cmd->info.size_out, + return cxl_mbox_cmd_ctor(mbox_cmd, cxlds, mem_cmd.opcode, + mem_cmd.info.size_in, mem_cmd.info.size_out, send_cmd->in.payload); } @@ -490,7 +487,6 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &cxlmd->dev; struct cxl_send_command send; - struct cxl_mem_command c; struct cxl_mbox_cmd mbox_cmd; int rc; @@ -499,7 +495,7 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) if (copy_from_user(&send, s, sizeof(send))) return -EFAULT; - rc = cxl_validate_cmd_from_user(&mbox_cmd, cxlmd->cxlds, &send, &c); + rc = cxl_validate_cmd_from_user(&mbox_cmd, cxlmd->cxlds, &send); if (rc) return rc; -- cgit v1.2.3 From 6179045ccc0c6229dc449afc1701dc7fbd40571f Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:18 -0700 Subject: cxl/mbox: Block immediate mode in SET_PARTITION_INFO command User space may send the SET_PARTITION_INFO mailbox command using the IOCTL interface. Inspect the input payload and fail if the immediate flag is set. This is the first instance of the driver inspecting an input payload from user space. Assume there will be more such cases and implement with an extensible helper. In order for the kernel to react to an immediate partition change it needs to assert that the change will not affect any active decode. At a minimum this requires validating that the device is using HDM decoders instead of the CXL DVSEC for decode, and that none of the active HDM decoders are affected by the partition change. For now, just fail until that support arrives. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/241821186c363833980adbc389e2c547bc5a6395.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 41 +++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxlmem.h | 7 +++++++ 2 files changed, 48 insertions(+) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 704d6b313144..839207b627a8 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -218,6 +218,40 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) return true; } +/** + * cxl_payload_from_user_allowed() - Check contents of in_payload. + * @opcode: The mailbox command opcode. + * @payload_in: Pointer to the input payload passed in from user space. + * + * Return: + * * true - payload_in passes check for @opcode. + * * false - payload_in contains invalid or unsupported values. + * + * The driver may inspect payload contents before sending a mailbox + * command from user space to the device. The intent is to reject + * commands with input payloads that are known to be unsafe. This + * check is not intended to replace the users careful selection of + * mailbox command parameters and makes no guarantee that the user + * command will succeed, nor that it is appropriate. + * + * The specific checks are determined by the opcode. + */ +static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) +{ + switch (opcode) { + case CXL_MBOX_OP_SET_PARTITION_INFO: { + struct cxl_mbox_set_partition_info *pi = payload_in; + + if (pi->flags && CXL_SET_PARTITION_IMMEDIATE_FLAG) + return false; + break; + } + default: + break; + } + return true; +} + static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, struct cxl_dev_state *cxlds, u16 opcode, size_t in_size, size_t out_size, u64 in_payload) @@ -232,6 +266,13 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, in_size); if (!mbox->payload_in) return PTR_ERR(mbox->payload_in); + + if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) { + dev_dbg(cxlds->dev, "%s: input payload not allowed\n", + cxl_mem_opcode_to_name(opcode)); + kvfree(mbox->payload_in); + return -EBUSY; + } } /* Prepare to handle a full payload for variable sized output */ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 5d33ce24fe09..df8dad028e43 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -262,6 +262,13 @@ struct cxl_mbox_set_lsa { u8 data[]; } __packed; +struct cxl_mbox_set_partition_info { + __le64 volatile_capacity; + u8 flags; +} __packed; + +#define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0) + /** * struct cxl_mem_command - Driver representation of a memory device command * @info: Command information as it exists for the UAPI -- cgit v1.2.3 From 6aa657f416b65f23d7a3c9d04f144b1c4aa2ebc1 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:19 -0700 Subject: cxl/pmem: Remove CXL SET_PARTITION_INFO from exclusive_cmds list With SET_PARTITION_INFO on the exclusive_cmds list for the CXL_PMEM driver, userspace cannot execute a set-partition command without first unbinding the pmem driver from the device. When userspace requests a partition change to take effect on the next reboot this unbind requirement is unnecessarily restrictive. The driver does not need to enforce an unbind because partitions will not change until the next reboot. Of course, userspace still needs to be aware that changing the size of persistent capacity on the next reboot will result in the loss of data stored. That can happen regardless of whether it is presently bound at the time of issuing the set-partition command. When userspace requests a partition change to take effect immediately, restrictions are needed. The CXL_MEM driver currently blocks the usage of immediate mode, making the presence of SET_PARTITION_INFO, in this exclusive commands list, redundant. In the future, when the CXL_MEM driver adds support for immediate changes to device partitions it will ensure that the partition change will not affect any active decode. That means the work will not fall right back here, onto the CXL_PMEM driver. Signed-off-by: Alison Schofield Link: https://lore.kernel.org/r/accc6abc878f0662093b81490a1a052f2ff6f06e.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/pmem.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 15ad666ab03e..404018dd99e6 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -344,7 +344,6 @@ static __init int cxl_pmem_init(void) { int rc; - set_bit(CXL_MEM_COMMAND_ID_SET_PARTITION_INFO, exclusive_cmds); set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds); set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds); -- cgit v1.2.3 From ee92c7e261fd4b58ed5991a776ae9b25e9a5e030 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 3 Apr 2022 19:12:13 -0700 Subject: cxl/mbox: Drop mbox_mutex comment ... we have lockdep for this. Signed-off-by: Davidlohr Bueso Reviewed by: Adam Manzanares Link: https://lore.kernel.org/r/20220404021216.66841-2-dave@stgolabs.net Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 839207b627a8..187590c9844c 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -147,7 +147,7 @@ static const char *cxl_mem_opcode_to_name(u16 opcode) * @out: Caller allocated buffer for the output. * @out_size: Expected size of output. * - * Context: Any context. Will acquire and release mbox_mutex. + * Context: Any context. * Return: * * %>=0 - Number of bytes returned in @out. * * %-E2BIG - Payload is too large for hardware. -- cgit v1.2.3 From cbe83a2052682c6f57d45f76fe7fea4bf254acd9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 3 Apr 2022 19:12:14 -0700 Subject: cxl/pci: Use CXL_MBOX_SUCCESS to check against mbox_cmd return code Also mention the need for the caller to check against any errors from the hardware in return_code. Signed-off-by: Davidlohr Bueso Reviewed by: Adam Manzanares Link: https://lore.kernel.org/r/20220404021216.66841-3-dave@stgolabs.net Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 3f2182d66829..94a91048e2f6 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -177,9 +177,9 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, mbox_cmd->return_code = FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); - if (mbox_cmd->return_code != 0) { + if (mbox_cmd->return_code != CXL_MBOX_SUCCESS) { dev_dbg(dev, "Mailbox operation had an error\n"); - return 0; + return 0; /* completed but caller must check return_code */ } /* #7 */ -- cgit v1.2.3 From 92fcc1abab095dceb2337444f79875c8a85063df Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 3 Apr 2022 19:12:15 -0700 Subject: cxl/mbox: Improve handling of mbox_cmd hw return codes Upon a completed command the caller is still expected to check the actual return_code register to ensure it succeed. This adds, per the spec, the potential command return codes. It maps the hardware return code with the kernel's errno style, and by default continues to use -ENXIO (Command completed, but device reported an error). Signed-off-by: Davidlohr Bueso Reviewed by: Adam Manzanares Link: https://lore.kernel.org/r/20220404021216.66841-4-dave@stgolabs.net Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 2 +- drivers/cxl/cxlmem.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++- drivers/cxl/pci.c | 2 +- 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 187590c9844c..5adc0ddb1737 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -181,7 +181,7 @@ int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, return rc; /* TODO: Map return code to proper kernel style errno */ - if (mbox_cmd.return_code != CXL_MBOX_SUCCESS) + if (mbox_cmd.return_code != CXL_MBOX_CMD_RC_SUCCESS) return -ENXIO; /* diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index df8dad028e43..243dd86a8b46 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -85,9 +85,60 @@ struct cxl_mbox_cmd { size_t size_in; size_t size_out; u16 return_code; -#define CXL_MBOX_SUCCESS 0 }; +/* + * Per CXL 2.0 Section 8.2.8.4.5.1 + */ +#define CMD_CMD_RC_TABLE \ + C(SUCCESS, 0, NULL), \ + C(BACKGROUND, -ENXIO, "background cmd started successfully"), \ + C(INPUT, -ENXIO, "cmd input was invalid"), \ + C(UNSUPPORTED, -ENXIO, "cmd is not supported"), \ + C(INTERNAL, -ENXIO, "internal device error"), \ + C(RETRY, -ENXIO, "temporary error, retry once"), \ + C(BUSY, -ENXIO, "ongoing background operation"), \ + C(MEDIADISABLED, -ENXIO, "media access is disabled"), \ + C(FWINPROGRESS, -ENXIO, "one FW package can be transferred at a time"), \ + C(FWOOO, -ENXIO, "FW package content was transferred out of order"), \ + C(FWAUTH, -ENXIO, "FW package authentication failed"), \ + C(FWSLOT, -ENXIO, "FW slot is not supported for requested operation"), \ + C(FWROLLBACK, -ENXIO, "rolled back to the previous active FW"), \ + C(FWRESET, -ENXIO, "FW failed to activate, needs cold reset"), \ + C(HANDLE, -ENXIO, "one or more Event Record Handles were invalid"), \ + C(PADDR, -ENXIO, "physical address specified is invalid"), \ + C(POISONLMT, -ENXIO, "poison injection limit has been reached"), \ + C(MEDIAFAILURE, -ENXIO, "permanent issue with the media"), \ + C(ABORT, -ENXIO, "background cmd was aborted by device"), \ + C(SECURITY, -ENXIO, "not valid in the current security state"), \ + C(PASSPHRASE, -ENXIO, "phrase doesn't match current set passphrase"), \ + C(MBUNSUPPORTED, -ENXIO, "unsupported on the mailbox it was issued on"),\ + C(PAYLOADLEN, -ENXIO, "invalid payload length") + +#undef C +#define C(a, b, c) CXL_MBOX_CMD_RC_##a +enum { CMD_CMD_RC_TABLE }; +#undef C +#define C(a, b, c) { b, c } +struct cxl_mbox_cmd_rc { + int err; + const char *desc; +}; + +static const +struct cxl_mbox_cmd_rc cxl_mbox_cmd_rctable[] ={ CMD_CMD_RC_TABLE }; +#undef C + +static inline const char *cxl_mbox_cmd_rc2str(struct cxl_mbox_cmd *mbox_cmd) +{ + return cxl_mbox_cmd_rctable[mbox_cmd->return_code].desc; +} + +static inline int cxl_mbox_cmd_rc2errno(struct cxl_mbox_cmd *mbox_cmd) +{ + return cxl_mbox_cmd_rctable[mbox_cmd->return_code].err; +} + /* * CXL 2.0 - Memory capacity multiplier * See Section 8.2.9.5 diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 94a91048e2f6..88fcd6cc38a3 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -177,7 +177,7 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, mbox_cmd->return_code = FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); - if (mbox_cmd->return_code != CXL_MBOX_SUCCESS) { + if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { dev_dbg(dev, "Mailbox operation had an error\n"); return 0; /* completed but caller must check return_code */ } -- cgit v1.2.3 From c43e036d6f861f4c68b50eea49ab55b539eaab02 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 3 Apr 2022 19:12:16 -0700 Subject: cxl/mbox: Use new return_code handling Use the global cxl_mbox_cmd_rc table to improve debug messaging in __cxl_pci_mbox_send_cmd() and allow cxl_mbox_send_cmd() to map to proper kernel style errno codes - this patch continues to use -ENXIO only so no change in semantics. Signed-off-by: Davidlohr Bueso Reviewed by: Adam Manzanares Link: https://lore.kernel.org/r/20220404021216.66841-5-dave@stgolabs.net Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 3 +-- drivers/cxl/pci.c | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 5adc0ddb1737..8a8388599a85 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -180,9 +180,8 @@ int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, if (rc) return rc; - /* TODO: Map return code to proper kernel style errno */ if (mbox_cmd.return_code != CXL_MBOX_CMD_RC_SUCCESS) - return -ENXIO; + return cxl_mbox_cmd_rc2errno(&mbox_cmd); /* * Variable sized commands can't be validated and so it's up to the diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 88fcd6cc38a3..39694cc63381 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -178,7 +178,8 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { - dev_dbg(dev, "Mailbox operation had an error\n"); + dev_dbg(dev, "Mailbox operation had an error: %s\n", + cxl_mbox_cmd_rc2str(mbox_cmd)); return 0; /* completed but caller must check return_code */ } -- cgit v1.2.3 From e08063fb87944b1db963e94b833608318179708d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 14 Mar 2022 18:22:22 -0700 Subject: cxl/mem: Drop DVSEC vs EFI Memory Map sanity check When the driver finds legacy DVSEC ranges active on a CXL Memory Expander it indicates that platform firmware is not aware of, or is deliberately disabling common CXL 2.0 operation. In this case Linux generally has no choice, but to leave the device alone. The driver attempts to validate that the DVSEC range is in the EFI memory map. Remove that logic since there is no requirement that the BIOS publish DVSEC ranges in the EFI Memory Map. In the future the driver will want to permanently reserve this capacity out of the available CFMWS capacity and hide it from request_free_mem_region(), but it serves no purpose to warn about the range not appearing in the EFI Memory Map. Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/164730734246.3806189.13995924771963139898.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 49a4b1c47299..cd4e8bba82aa 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -158,30 +158,8 @@ static int cxl_mem_probe(struct device *dev) * is no use in trying to manage those. */ if (!cxl_dvsec_decode_init(cxlds)) { - struct cxl_endpoint_dvsec_info *info = &cxlds->info; - int i; - - /* */ - for (i = 0; i < 2; i++) { - u64 base, size; - - /* - * Give a nice warning to the user that BIOS has really - * botched things for them if it didn't place DVSEC - * ranges in the memory map. - */ - base = info->dvsec_range[i].start; - size = range_len(&info->dvsec_range[i]); - if (size && !region_intersects(base, size, - IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE)) { - dev_err(dev, - "DVSEC range %#llx-%#llx must be reserved by BIOS, but isn't\n", - base, base + size - 1); - } - } dev_err(dev, - "Active DVSEC range registers in use. Will not bind.\n"); + "Legacy range registers configuration prevents HDM operation.\n"); return -EBUSY; } -- cgit v1.2.3 From e39f9be08d9dfe685c8a325ac1755c04f383effc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 14 Mar 2022 18:22:28 -0700 Subject: cxl/pci: Add debug for DVSEC range init failures In preparation for not treating DVSEC range initialization failures as fatal to cxl_pci_probe() add individual dev_dbg() statements for each of the major failure reasons in cxl_dvsec_ranges(). The rationale for cxl_dvsec_ranges() failure not being fatal is that there is still value for cxl_pci to enable mailbox operations even if CXL.mem operation is disabled. Reviewed-by: Jonathan Cameron Reviewed-by: Ben Widawsky Reviewed-by: Davidlohr Bueso Link: https://lore.kernel.org/r/164730734812.3806189.2726330688692684104.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 39694cc63381..1cb2557d68b7 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -467,12 +467,15 @@ static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) { struct cxl_endpoint_dvsec_info *info = &cxlds->info; struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct device *dev = &pdev->dev; int d = cxlds->cxl_dvsec; int hdm_count, rc, i; u16 cap, ctrl; - if (!d) + if (!d) { + dev_dbg(dev, "No DVSEC Capability\n"); return -ENXIO; + } rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); if (rc) @@ -482,8 +485,10 @@ static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) if (rc) return rc; - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) + if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { + dev_dbg(dev, "Not MEM Capable\n"); return -ENXIO; + } /* * It is not allowed by spec for MEM.capable to be set and have 0 legacy @@ -496,8 +501,10 @@ static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) return -EINVAL; rc = wait_for_valid(cxlds); - if (rc) + if (rc) { + dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc); return rc; + } info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); -- cgit v1.2.3 From fbaf2b079d2a0a9c7114fbd4d1c0f3dd7a3cb3ad Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 14 Mar 2022 18:22:33 -0700 Subject: cxl/mem: Make cxl_dvsec_range() init failure fatal In preparation for the cxl_pci driver to continue operation after cxl_dvsec_range() failure, update cxl_mem to check for negative error codes in info->ranges. Treat that condition as fatal regardless of the state of the HDM configuration since cxl_mem needs positive confirmation that legacy ranges were not established by platform firmware or another agent. Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Link: https://lore.kernel.org/r/164730735324.3806189.4167509857771192422.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index cd4e8bba82aa..50704deb2ff0 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -88,6 +88,9 @@ __mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) void __iomem *crb; u32 global_ctrl; + if (info->ranges < 0) + return false; + /* map hdm decoder */ crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); if (!crb) { -- cgit v1.2.3 From 36bfc6ad508af38f212cf5a38147d867fb3f80a8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 14 Mar 2022 18:22:38 -0700 Subject: cxl/pci: Make cxl_dvsec_ranges() failure not fatal to cxl_pci cxl_dvsec_ranges(), the helper for enumerating the presence of an active legacy CXL.mem configuration on a CXL 2.0 Memory Expander, is not fatal for cxl_pci because there is still value to enable mailbox operations even if CXL.mem operation is disabled. Recall that the reason cxl_pci does this initialization and not cxl_mem is to preserve the useful property (for unit testing) that cxl_mem is cxl_memdev + mmio generic, and does not require access to a 'struct pci_dev' to issue config cycles. Update 'struct cxl_endpoint_dvsec_info' to carry either a positive number of non-zero size legacy CXL DVSEC ranges, or the negative error code from __cxl_dvsec_ranges() in its @ranges member. Reported-by: Krzysztof Zach Fixes: 560f78559006 ("cxl/pci: Retrieve CXL DVSEC memory info") Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Link: https://lore.kernel.org/r/164730735869.3806189.4032428192652531946.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 1cb2557d68b7..e7ab9a34d718 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -463,13 +463,18 @@ static int wait_for_media_ready(struct cxl_dev_state *cxlds) return 0; } -static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) +/* + * Return positive number of non-zero ranges on success and a negative + * error code on failure. The cxl_mem driver depends on ranges == 0 to + * init HDM operation. + */ +static int __cxl_dvsec_ranges(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) { - struct cxl_endpoint_dvsec_info *info = &cxlds->info; struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int hdm_count, rc, i, ranges = 0; struct device *dev = &pdev->dev; int d = cxlds->cxl_dvsec; - int hdm_count, rc, i; u16 cap, ctrl; if (!d) { @@ -546,10 +551,17 @@ static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) }; if (size) - info->ranges++; + ranges++; } - return 0; + return ranges; +} + +static void cxl_dvsec_ranges(struct cxl_dev_state *cxlds) +{ + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + + info->ranges = __cxl_dvsec_ranges(cxlds, info); } static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -618,10 +630,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = cxl_dvsec_ranges(cxlds); - if (rc) - dev_warn(&pdev->dev, - "Failed to get DVSEC range information (%d)\n", rc); + cxl_dvsec_ranges(cxlds); cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) -- cgit v1.2.3 From 31e624a77e748e4ab7d5c9c3ddc46ba7735bd75e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 14 Mar 2022 18:22:44 -0700 Subject: cxl/mem: Rename cxl_dvsec_decode_init() to cxl_hdm_decode_init() cxl_dvsec_decode_init() is tasked with checking whether legacy DVSEC range based decode is in effect, or whether HDM can be enabled / already is enabled. As such it either succeeds or fails and that result is the return value. The @do_hdm_init variable is misleading in the case where HDM operation is already found to be active, so just call it @retval. Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Link: https://lore.kernel.org/r/164730736435.3806189.2537160791687837469.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 12 ++++++------ tools/testing/cxl/mock_mem.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 50704deb2ff0..3baae1332760 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -68,7 +68,7 @@ static int create_endpoint(struct cxl_memdev *cxlmd, } /** - * cxl_dvsec_decode_init() - Setup HDM decoding for the endpoint + * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint * @cxlds: Device state * * Additionally, enables global HDM decoding. Warning: don't call this outside @@ -79,12 +79,12 @@ static int create_endpoint(struct cxl_memdev *cxlmd, * decoders, or if it can not be determined if DVSEC Ranges are in use. * Otherwise, returns true. */ -__mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) +__mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds) { struct cxl_endpoint_dvsec_info *info = &cxlds->info; struct cxl_register_map map; struct cxl_component_reg_map *cmap = &map.component_map; - bool global_enable, do_hdm_init = false; + bool global_enable, retval = false; void __iomem *crb; u32 global_ctrl; @@ -113,7 +113,7 @@ __mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) goto out; } - do_hdm_init = true; + retval = true; /* * Permanently (for this boot at least) opt the device into HDM @@ -129,7 +129,7 @@ __mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) out: iounmap(crb); - return do_hdm_init; + return retval; } static int cxl_mem_probe(struct device *dev) @@ -160,7 +160,7 @@ static int cxl_mem_probe(struct device *dev) * If DVSEC ranges are being used instead of HDM decoder registers there * is no use in trying to manage those. */ - if (!cxl_dvsec_decode_init(cxlds)) { + if (!cxl_hdm_decode_init(cxlds)) { dev_err(dev, "Legacy range registers configuration prevents HDM operation.\n"); return -EBUSY; diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c index d1dec5845139..69946f678cfa 100644 --- a/tools/testing/cxl/mock_mem.c +++ b/tools/testing/cxl/mock_mem.c @@ -4,7 +4,7 @@ #include struct cxl_dev_state; -bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) +bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds) { return true; } -- cgit v1.2.3 From 35ee1f499091c76bd5f5d52f5ef79c3568ac74a6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 8 Apr 2022 12:30:29 -0700 Subject: cxl/mem: Replace redundant debug message with a comment cxl_mem_probe() already emits a log message when HDM operation can not be established. Delete the similar one in cxl_hdm_decode_init(). What is less obvious is why global_ctrl being enabled makes positive values of info->ranges irrelevant, and the Linux behavior with respect to the spec recommendation to mirror CXL Range registers with HDM Decoder Base + Size registers. Cc: Ben Widawsky Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Link: https://lore.kernel.org/r/164944616743.454665.7055846627973202403.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 3baae1332760..43e73d259207 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -107,11 +107,17 @@ __mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds) global_ctrl = readl(crb + cmap->hdm_decoder.offset + CXL_HDM_DECODER_CTRL_OFFSET); global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; - if (!global_enable && info->ranges) { - dev_dbg(cxlds->dev, - "DVSEC ranges already programmed and HDM decoders not enabled.\n"); + + /* + * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base + * [High,Low] when HDM operation is enabled the range register values + * are ignored by the device, but the spec also recommends matching the + * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges + * are expected even though Linux does not require or maintain that + * match. + */ + if (!global_enable && info->ranges) goto out; - } retval = true; -- cgit v1.2.3 From 9ea4dcf49878bb9546b8fa9319dcbdc9b7ee20f8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 22 Apr 2022 15:58:11 -0700 Subject: PM: CXL: Disable suspend The CXL specification claims S3 support at a hardware level, but at a system software level there are some missing pieces. Section 9.4 (CXL 2.0) rightly claims that "CXL mem adapters may need aux power to retain memory context across S3", but there is no enumeration mechanism for the OS to determine if a given adapter has that support. Moreover the save state and resume image for the system may inadvertantly end up in a CXL device that needs to be restored before the save state is recoverable. I.e. a circular dependency that is not resolvable without a third party save-area. Arrange for the cxl_mem driver to fail S3 attempts. This still nominaly allows for suspend, but requires unbinding all CXL memory devices before the suspend to ensure the typical DRAM flow is taken. The cxl_mem unbind flow is intended to also tear down all CXL memory regions associated with a given cxl_memdev. It is reasonable to assume that any device participating in a System RAM range published in the EFI memory map is covered by aux power and save-area outside the device itself. So this restriction can be minimized in the future once pre-existing region enumeration support arrives, and perhaps a spec update to clarify if the EFI memory map is sufficent for determining the range of devices managed by platform-firmware for S3 support. Per Rafael, if the CXL configuration prevents suspend then it should fail early before tasks are frozen, and mem_sleep should stop showing 'mem' as an option [1]. Effectively CXL augments the platform suspend ->valid() op since, for example, the ACPI ops are not aware of the CXL / PCI dependencies. Given the split role of platform firmware vs OS provisioned CXL memory it is up to the cxl_mem driver to determine if the CXL configuration has elements that platform firmware may not be prepared to restore. Link: https://lore.kernel.org/r/CAJZ5v0hGVN_=3iU8OLpHY3Ak35T5+JcBM-qs8SbojKrpd0VXsA@mail.gmail.com [1] Cc: "Rafael J. Wysocki" Cc: Pavel Machek Cc: Len Brown Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/165066828317.3907920.5690432272182042556.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/Makefile | 2 +- drivers/cxl/Kconfig | 4 ++++ drivers/cxl/Makefile | 2 +- drivers/cxl/core/Makefile | 1 + drivers/cxl/core/suspend.c | 24 ++++++++++++++++++++++++ drivers/cxl/cxlmem.h | 11 +++++++++++ drivers/cxl/mem.c | 22 +++++++++++++++++++++- include/linux/pm.h | 9 +++++++++ kernel/power/hibernate.c | 2 +- kernel/power/main.c | 5 ++++- kernel/power/suspend.c | 3 ++- 11 files changed, 79 insertions(+), 6 deletions(-) create mode 100644 drivers/cxl/core/suspend.c diff --git a/drivers/Makefile b/drivers/Makefile index 020780b6b4d2..f735c4955143 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -72,9 +72,9 @@ obj-$(CONFIG_PARPORT) += parport/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ obj-$(CONFIG_DAX) += dax/ -obj-$(CONFIG_CXL_BUS) += cxl/ obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ obj-$(CONFIG_NUBUS) += nubus/ +obj-y += cxl/ obj-y += macintosh/ obj-y += scsi/ obj-y += nvme/ diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index b88ab956bb7c..f64e3984689f 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -98,4 +98,8 @@ config CXL_PORT default CXL_BUS tristate +config CXL_SUSPEND + def_bool y + depends on SUSPEND && CXL_MEM + endif diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile index ce267ef11d93..a78270794150 100644 --- a/drivers/cxl/Makefile +++ b/drivers/cxl/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CXL_BUS) += core/ +obj-y += core/ obj-$(CONFIG_CXL_PCI) += cxl_pci.o obj-$(CONFIG_CXL_MEM) += cxl_mem.o obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 6d37cd78b151..9d35085d25af 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CXL_BUS) += cxl_core.o +obj-$(CONFIG_CXL_SUSPEND) += suspend.o ccflags-y += -I$(srctree)/drivers/cxl cxl_core-y := port.o diff --git a/drivers/cxl/core/suspend.c b/drivers/cxl/core/suspend.c new file mode 100644 index 000000000000..a5984d96ea1d --- /dev/null +++ b/drivers/cxl/core/suspend.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include +#include +#include "cxlmem.h" + +static atomic_t mem_active; + +bool cxl_mem_active(void) +{ + return atomic_read(&mem_active) != 0; +} + +void cxl_mem_active_inc(void) +{ + atomic_inc(&mem_active); +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_active_inc, CXL); + +void cxl_mem_active_dec(void) +{ + atomic_dec(&mem_active); +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_active_dec, CXL); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 243dd86a8b46..7235d2f976e5 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -353,6 +353,17 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); +#ifdef CONFIG_CXL_SUSPEND +void cxl_mem_active_inc(void); +void cxl_mem_active_dec(void); +#else +static inline void cxl_mem_active_inc(void) +{ +} +static inline void cxl_mem_active_dec(void) +{ +} +#endif struct cxl_hdm { struct cxl_component_regs regs; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 43e73d259207..1bd2e0b67f59 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -138,6 +138,11 @@ out: return retval; } +static void enable_suspend(void *data) +{ + cxl_mem_active_dec(); +} + static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); @@ -194,7 +199,22 @@ static int cxl_mem_probe(struct device *dev) out: cxl_device_unlock(&parent_port->dev); put_device(&parent_port->dev); - return rc; + + /* + * The kernel may be operating out of CXL memory on this device, + * there is no spec defined way to determine whether this device + * preserves contents over suspend, and there is no simple way + * to arrange for the suspend image to avoid CXL memory which + * would setup a circular dependency between PCI resume and save + * state restoration. + * + * TODO: support suspend when all the regions this device is + * hosting are locked and covered by the system address map, + * i.e. platform firmware owns restoring the HDM configuration + * that it locked. + */ + cxl_mem_active_inc(); + return devm_add_action_or_reset(dev, enable_suspend, NULL); } static struct cxl_driver cxl_mem_driver = { diff --git a/include/linux/pm.h b/include/linux/pm.h index e65b3ab28377..7911c4c9a7be 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -36,6 +36,15 @@ static inline void pm_vt_switch_unregister(struct device *dev) } #endif /* CONFIG_VT_CONSOLE_SLEEP */ +#ifdef CONFIG_CXL_SUSPEND +bool cxl_mem_active(void); +#else +static inline bool cxl_mem_active(void) +{ + return false; +} +#endif + /* * Device power management */ diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 938d5c78b421..20a66bf9f465 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -83,7 +83,7 @@ bool hibernation_available(void) { return nohibernate == 0 && !security_locked_down(LOCKDOWN_HIBERNATION) && - !secretmem_active(); + !secretmem_active() && !cxl_mem_active(); } /** diff --git a/kernel/power/main.c b/kernel/power/main.c index 7e646079fbeb..3e6be1c33e0b 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -127,7 +127,9 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, char *s = buf; suspend_state_t i; - for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) + for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) { + if (i >= PM_SUSPEND_MEM && cxl_mem_active()) + continue; if (mem_sleep_states[i]) { const char *label = mem_sleep_states[i]; @@ -136,6 +138,7 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, else s += sprintf(s, "%s ", label); } + } /* Convert the last space to a newline if needed. */ if (s != buf) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 6fcdee7e87a5..827075944d28 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -236,7 +236,8 @@ EXPORT_SYMBOL_GPL(suspend_valid_only_mem); static bool sleep_state_supported(suspend_state_t state) { - return state == PM_SUSPEND_TO_IDLE || valid_state(state); + return state == PM_SUSPEND_TO_IDLE || + (valid_state(state) && !cxl_mem_active()); } static int platform_suspend_prepare(suspend_state_t state) -- cgit v1.2.3 From 26f89535a5bb17915a2e1062c3999a2ee797c7b0 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 13 Apr 2022 22:12:46 -0700 Subject: cxl/mbox: Use type __u32 for mailbox payload sizes Payload sizes for mailbox commands are expected to be positive values coming from userspace. The documentation correctly describes these as always unsigned values. The mailbox and send structures that support the mailbox commands however, use __s32 types for the payloads. Replace __s32 with __u32 in the mailbox and send command structures and update usages. Kernel users of the interface already block all negative values and there is no known ability for userspace to have grown a dependency on submitting negative values to the kernel. The known user of the IOCTL, the CXL command line interface (cxl-cli) already enforces positive size values. A Smatch warning of a signedness uncovered this issue. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Alison Schofield Link: https://lore.kernel.org/r/20220414051246.1244575-1-alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 28 +++++++++++++++------------- include/uapi/linux/cxl_mem.h | 14 +++++++------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 8a8388599a85..d54a6d175fff 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -35,6 +35,7 @@ static bool cxl_raw_allow_all; .flags = _flags, \ } +#define CXL_VARIABLE_PAYLOAD ~0U /* * This table defines the supported mailbox commands for the driver. This table * is made up of a UAPI structure. Non-negative values as parameters in the @@ -44,26 +45,26 @@ static bool cxl_raw_allow_all; static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = { CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE), #ifdef CONFIG_CXL_MEM_RAW_COMMANDS - CXL_CMD(RAW, ~0, ~0, 0), + CXL_CMD(RAW, CXL_VARIABLE_PAYLOAD, CXL_VARIABLE_PAYLOAD, 0), #endif - CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE), + CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE), CXL_CMD(GET_FW_INFO, 0, 0x50, 0), CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0), - CXL_CMD(GET_LSA, 0x8, ~0, 0), + CXL_CMD(GET_LSA, 0x8, CXL_VARIABLE_PAYLOAD, 0), CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0), - CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE), + CXL_CMD(GET_LOG, 0x18, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE), CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0), - CXL_CMD(SET_LSA, ~0, 0, 0), + CXL_CMD(SET_LSA, CXL_VARIABLE_PAYLOAD, 0, 0), CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0), CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0), CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0), CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0), - CXL_CMD(GET_POISON, 0x10, ~0, 0), + CXL_CMD(GET_POISON, 0x10, CXL_VARIABLE_PAYLOAD, 0), CXL_CMD(INJECT_POISON, 0x8, 0, 0), CXL_CMD(CLEAR_POISON, 0x48, 0, 0), CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0), CXL_CMD(SCAN_MEDIA, 0x11, 0, 0), - CXL_CMD(GET_SCAN_MEDIA, 0, ~0, 0), + CXL_CMD(GET_SCAN_MEDIA, 0, CXL_VARIABLE_PAYLOAD, 0), }; /* @@ -187,9 +188,10 @@ int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, * Variable sized commands can't be validated and so it's up to the * caller to do that if they wish. */ - if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size) - return -EIO; - + if (cmd->info.size_out != CXL_VARIABLE_PAYLOAD) { + if (mbox_cmd.size_out != out_size) + return -EIO; + } return 0; } EXPORT_SYMBOL_NS_GPL(cxl_mbox_send_cmd, CXL); @@ -275,7 +277,7 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, } /* Prepare to handle a full payload for variable sized output */ - if (out_size < 0) + if (out_size == CXL_VARIABLE_PAYLOAD) mbox->size_out = cxlds->payload_size; else mbox->size_out = out_size; @@ -353,11 +355,11 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, return -EBUSY; /* Check the input buffer is the expected size */ - if (info->size_in >= 0 && info->size_in != send_cmd->in.size) + if (info->size_in != send_cmd->in.size) return -ENOMEM; /* Check the output buffer is at least large enough */ - if (info->size_out >= 0 && send_cmd->out.size < info->size_out) + if (send_cmd->out.size < info->size_out) return -ENOMEM; *mem_cmd = (struct cxl_mem_command) { diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 8d206f27bb6d..c71021a2a9ed 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -68,8 +68,8 @@ static const struct { * struct cxl_command_info - Command information returned from a query. * @id: ID number for the command. * @flags: Flags that specify command behavior. - * @size_in: Expected input size, or -1 if variable length. - * @size_out: Expected output size, or -1 if variable length. + * @size_in: Expected input size, or ~0 if variable length. + * @size_out: Expected output size, or ~0 if variable length. * * Represents a single command that is supported by both the driver and the * hardware. This is returned as part of an array from the query ioctl. The @@ -78,7 +78,7 @@ static const struct { * * - @id = 10 * - @flags = 0 - * - @size_in = -1 + * - @size_in = ~0 * - @size_out = 0 * * See struct cxl_mem_query_commands. @@ -89,8 +89,8 @@ struct cxl_command_info { __u32 flags; #define CXL_MEM_COMMAND_FLAG_MASK GENMASK(0, 0) - __s32 size_in; - __s32 size_out; + __u32 size_in; + __u32 size_out; }; /** @@ -169,13 +169,13 @@ struct cxl_send_command { __u32 retval; struct { - __s32 size; + __u32 size; __u32 rsvd; __u64 payload; } in; struct { - __s32 size; + __u32 size; __u32 rsvd; __u64 payload; } out; -- cgit v1.2.3 From 280302f0e8f6919f0c591753ea21906d77797746 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 6 Apr 2022 18:09:15 -0700 Subject: cxl/mbox: Replace NULL check with IS_ERR() after vmemdup_user() vmemdup_user() returns an ERR_PTR() on failure. Use IS_ERR() to check the return value. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Alison Schofield Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/20220407010915.1211258-1-alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index d54a6d175fff..731cb43b570e 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -265,7 +265,7 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, if (in_size) { mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload), in_size); - if (!mbox->payload_in) + if (IS_ERR(mbox->payload_in)) return PTR_ERR(mbox->payload_in); if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) { -- cgit v1.2.3 From 35e01667c84b1e16060020c3a13099447e61c822 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Apr 2022 12:38:25 +0300 Subject: cxl/mbox: fix logical vs bitwise typo This should be bitwise & instead of &&. Fixes: 6179045ccc0c ("cxl/mbox: Block immediate mode in SET_PARTITION_INFO command") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YmpgkbbQ1Yxu36uO@kili Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 731cb43b570e..54f434733b56 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -243,7 +243,7 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) case CXL_MBOX_OP_SET_PARTITION_INFO: { struct cxl_mbox_set_partition_info *pi = payload_in; - if (pi->flags && CXL_SET_PARTITION_IMMEDIATE_FLAG) + if (pi->flags & CXL_SET_PARTITION_IMMEDIATE_FLAG) return false; break; } -- cgit v1.2.3 From cc10eee95204579fcd66fd5965073fdcbf629676 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 13 Apr 2022 01:36:16 -0600 Subject: PCI/ACPI: add a helper for retrieving _OSC Control DWORDs During _OSC negotiation, when the 'Control' DWORD is needed from the result buffer after running _OSC, a couple of places performed manual pointer arithmetic to offset into the right spot in the raw buffer. Add a acpi_osc_ctx_get_pci_control() helper to use the #define'd DWORD offsets to fetch the DWORDs needed from @acpi_osc_context, and replace the above instances of the open-coded arithmetic. Cc: "Rafael J. Wysocki" Suggested-by: Davidlohr Bueso Acked-by: Rafael J. Wysocki Reviewed-by: Rafael J. Wysocki Reviewed-by: Davidlohr Bueso Reviewed by: Adam Manzanares Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20220413073618.291335-2-vishal.l.verma@intel.com Signed-off-by: Dan Williams --- drivers/acpi/bus.c | 2 +- drivers/acpi/pci_root.c | 2 +- include/linux/acpi.h | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 3e58b613a2c4..7658acbbb2bd 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -425,7 +425,7 @@ static void acpi_bus_osc_negotiate_usb_control(void) } osc_sb_native_usb4_control = - control & ((u32 *)context.ret.pointer)[OSC_CONTROL_DWORD]; + control & acpi_osc_ctx_get_pci_control(&context); acpi_bus_decode_usb_osc("USB4 _OSC: OS supports", control); acpi_bus_decode_usb_osc("USB4 _OSC: OS controls", diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 6f9e75d14808..57be89cb3966 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -183,7 +183,7 @@ static acpi_status acpi_pci_run_osc(acpi_handle handle, status = acpi_run_osc(handle, &context); if (ACPI_SUCCESS(status)) { - *retval = *((u32 *)(context.ret.pointer + 8)); + *retval = acpi_osc_ctx_get_pci_control(&context); kfree(context.ret.pointer); } return status; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d7136d13aa44..04e5a038dd57 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -608,6 +608,13 @@ extern u32 osc_sb_native_usb4_control; #define OSC_PCI_EXPRESS_LTR_CONTROL 0x00000020 #define OSC_PCI_EXPRESS_DPC_CONTROL 0x00000080 +static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) +{ + u32 *ret = context->ret.pointer; + + return ret[OSC_CONTROL_DWORD]; +} + #define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 #define ACPI_GSB_ACCESS_ATTRIB_BYTE 0x00000006 @@ -1004,6 +1011,12 @@ static inline int acpi_register_wakeup_handler(int wake_irq, static inline void acpi_unregister_wakeup_handler( bool (*wakeup)(void *context), void *context) { } +struct acpi_osc_context; +static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) +{ + return 0; +} + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC -- cgit v1.2.3 From 241d26bc26add2e2867c546f7474902406d37c60 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 13 Apr 2022 01:36:17 -0600 Subject: PCI/ACPI: Prefer CXL _OSC instead of PCIe _OSC for CXL host bridges OB In preparation for negotiating OS control of CXL _OSC features, do the minimal enabling to use CXL _OSC to handle the base PCIe feature negotiation. Recall that CXL _OSC is a super-set of PCIe _OSC and the CXL 2.0 specification mandates: "If a CXL Host Bridge device exposes CXL _OSC, CXL aware OSPM shall evaluate CXL _OSC and not evaluate PCIe _OSC." Rather than pass a boolean flag alongside @root to all the helper functions that need to consider PCIe specifics, add is_pcie() and is_cxl() helper functions to check the flavor of @root. This also allows for dynamic fallback to PCIe _OSC in cases where an attempt to use CXL _OXC fails. This can happen on CXL 1.1 platforms that publish ACPI0016 devices to indicate CXL host bridges, but do not publish the optional CXL _OSC method. CXL _OSC is mandatory for CXL 2.0 hosts. Cc: Bjorn Helgaas Cc: "Rafael J. Wysocki" Cc: Robert Moore Reviewed-by: Jonathan Cameron Reviewed-by: Rafael J. Wysocki Reviewed-by: Davidlohr Bueso Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20220413073618.291335-3-vishal.l.verma@intel.com Signed-off-by: Dan Williams --- drivers/acpi/pci_root.c | 67 ++++++++++++++++++++++++++++++++++++++----------- include/acpi/acpi_bus.h | 6 +++++ include/linux/acpi.h | 4 +++ 3 files changed, 63 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 57be89cb3966..1e683c115ee0 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -168,20 +168,45 @@ static void decode_osc_control(struct acpi_pci_root *root, char *msg, u32 word) ARRAY_SIZE(pci_osc_control_bit)); } +static inline bool is_pcie(struct acpi_pci_root *root) +{ + return root->bridge_type == ACPI_BRIDGE_TYPE_PCIE; +} + +static inline bool is_cxl(struct acpi_pci_root *root) +{ + return root->bridge_type == ACPI_BRIDGE_TYPE_CXL; +} + static u8 pci_osc_uuid_str[] = "33DB4D5B-1FF7-401C-9657-7441C03DD766"; +static u8 cxl_osc_uuid_str[] = "68F2D50B-C469-4d8A-BD3D-941A103FD3FC"; + +static char *to_uuid(struct acpi_pci_root *root) +{ + if (is_cxl(root)) + return cxl_osc_uuid_str; + return pci_osc_uuid_str; +} + +static int cap_length(struct acpi_pci_root *root) +{ + if (is_cxl(root)) + return sizeof(u32) * OSC_CXL_CAPABILITY_DWORDS; + return sizeof(u32) * OSC_PCI_CAPABILITY_DWORDS; +} -static acpi_status acpi_pci_run_osc(acpi_handle handle, +static acpi_status acpi_pci_run_osc(struct acpi_pci_root *root, const u32 *capbuf, u32 *retval) { struct acpi_osc_context context = { - .uuid_str = pci_osc_uuid_str, + .uuid_str = to_uuid(root), .rev = 1, - .cap.length = 12, + .cap.length = cap_length(root), .cap.pointer = (void *)capbuf, }; acpi_status status; - status = acpi_run_osc(handle, &context); + status = acpi_run_osc(root->device->handle, &context); if (ACPI_SUCCESS(status)) { *retval = acpi_osc_ctx_get_pci_control(&context); kfree(context.ret.pointer); @@ -194,7 +219,7 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 *control) { acpi_status status; - u32 result, capbuf[3]; + u32 result, capbuf[OSC_CXL_CAPABILITY_DWORDS]; support |= root->osc_support_set; @@ -202,10 +227,18 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, capbuf[OSC_SUPPORT_DWORD] = support; capbuf[OSC_CONTROL_DWORD] = *control | root->osc_control_set; - status = acpi_pci_run_osc(root->device->handle, capbuf, &result); +retry: + status = acpi_pci_run_osc(root, capbuf, &result); if (ACPI_SUCCESS(status)) { root->osc_support_set = support; *control = result; + } else if (is_cxl(root)) { + /* + * CXL _OSC is optional on CXL 1.1 hosts. Fall back to PCIe _OSC + * upon any failure using CXL _OSC. + */ + root->bridge_type = ACPI_BRIDGE_TYPE_PCIE; + goto retry; } return status; } @@ -336,7 +369,7 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s u32 req = OSC_PCI_EXPRESS_CAPABILITY_CONTROL; struct acpi_pci_root *root; acpi_status status; - u32 ctrl, capbuf[3]; + u32 ctrl, capbuf[OSC_CXL_CAPABILITY_DWORDS]; if (!mask) return AE_BAD_PARAMETER; @@ -373,7 +406,7 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s capbuf[OSC_QUERY_DWORD] = 0; capbuf[OSC_SUPPORT_DWORD] = root->osc_support_set; capbuf[OSC_CONTROL_DWORD] = ctrl; - status = acpi_pci_run_osc(handle, capbuf, mask); + status = acpi_pci_run_osc(root, capbuf, mask); if (ACPI_FAILURE(status)) return status; @@ -452,8 +485,7 @@ static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) return true; } -static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, - bool is_pcie) +static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) { u32 support, control = 0, requested = 0; acpi_status status; @@ -504,7 +536,7 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, *no_aspm = 1; /* _OSC is optional for PCI host bridges */ - if ((status == AE_NOT_FOUND) && !is_pcie) + if (status == AE_NOT_FOUND && !is_pcie(root)) return; if (control) { @@ -527,7 +559,7 @@ static int acpi_pci_root_add(struct acpi_device *device, acpi_handle handle = device->handle; int no_aspm = 0; bool hotadd = system_state == SYSTEM_RUNNING; - bool is_pcie; + const char *acpi_hid; root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL); if (!root) @@ -585,8 +617,15 @@ static int acpi_pci_root_add(struct acpi_device *device, root->mcfg_addr = acpi_pci_root_get_mcfg_addr(handle); - is_pcie = strcmp(acpi_device_hid(device), "PNP0A08") == 0; - negotiate_os_control(root, &no_aspm, is_pcie); + acpi_hid = acpi_device_hid(root->device); + if (strcmp(acpi_hid, "PNP0A08") == 0) + root->bridge_type = ACPI_BRIDGE_TYPE_PCIE; + else if (strcmp(acpi_hid, "ACPI0016") == 0) + root->bridge_type = ACPI_BRIDGE_TYPE_CXL; + else + dev_dbg(&device->dev, "Assuming non-PCIe host bridge\n"); + + negotiate_os_control(root, &no_aspm); /* * TBD: Need PCI interface for enumeration/configuration of roots. diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c4b78c21d793..305ebf2a3fa7 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -582,10 +582,16 @@ int unregister_acpi_bus_type(struct acpi_bus_type *); int acpi_bind_one(struct device *dev, struct acpi_device *adev); int acpi_unbind_one(struct device *dev); +enum acpi_bridge_type { + ACPI_BRIDGE_TYPE_PCIE = 1, + ACPI_BRIDGE_TYPE_CXL, +}; + struct acpi_pci_root { struct acpi_device * device; struct pci_bus *bus; u16 segment; + int bridge_type; struct resource secondary; /* downstream bus range */ u32 osc_support_set; /* _OSC state of support bits */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 04e5a038dd57..82d91d9ccce5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -550,6 +550,10 @@ struct acpi_osc_context { acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); +/* Number of _OSC capability DWORDS depends on bridge type */ +#define OSC_PCI_CAPABILITY_DWORDS 3 +#define OSC_CXL_CAPABILITY_DWORDS 5 + /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */ #define OSC_QUERY_DWORD 0 /* DWORD 1 */ #define OSC_SUPPORT_DWORD 1 /* DWORD 2 */ -- cgit v1.2.3 From 56368029d93bbb3246ee2e03268fa6dd9754be05 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 13 Apr 2022 01:36:18 -0600 Subject: PCI/ACPI: negotiate CXL _OSC Add full support for negotiating _OSC as defined in the CXL 2.0 spec, as applicable to CXL-enabled platforms. Advertise support for the CXL features we support - 'CXL 2.0 port/device register access', 'Protocol Error Reporting', and 'CXL Native Hot Plug'. Request control for 'CXL Memory Error Reporting'. The requests are dependent on CONFIG_* based prerequisites, and prior PCI enabling, similar to how the standard PCI _OSC bits are determined. The CXL specification does not define any additional constraints on the hotplug flow beyond PCIe native hotplug, so a kernel that supports native PCIe hotplug, supports CXL hotplug. For error handling protocol and link errors just use PCIe AER. There is nascent support for amending AER events with CXL specific status [1], but there's otherwise no additional OS responsibility for CXL errors beyond PCIe AER. CXL Memory Errors behave the same as typical memory errors so CONFIG_MEMORY_FAILURE is sufficient to indicate support to platform firmware. [1]: https://lore.kernel.org/linux-cxl/164740402242.3912056.8303625392871313860.stgit@dwillia2-desk3.amr.corp.intel.com/ Cc: Bjorn Helgaas Cc: "Rafael J. Wysocki" Cc: Robert Moore Cc: Dan Williams Reviewed-by: Rafael J. Wysocki Reviewed-by: Davidlohr Bueso Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20220413073618.291335-4-vishal.l.verma@intel.com Signed-off-by: Dan Williams --- drivers/acpi/pci_root.c | 179 +++++++++++++++++++++++++++++++++++++++++++----- include/acpi/acpi_bus.h | 6 +- include/linux/acpi.h | 25 ++++++- 3 files changed, 188 insertions(+), 22 deletions(-) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 1e683c115ee0..c82ad63fffed 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -140,6 +140,17 @@ static struct pci_osc_bit_struct pci_osc_control_bit[] = { { OSC_PCI_EXPRESS_DPC_CONTROL, "DPC" }, }; +static struct pci_osc_bit_struct cxl_osc_support_bit[] = { + { OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT, "CXL11PortRegAccess" }, + { OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT, "CXL20PortDevRegAccess" }, + { OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT, "CXLProtocolErrorReporting" }, + { OSC_CXL_NATIVE_HP_SUPPORT, "CXLNativeHotPlug" }, +}; + +static struct pci_osc_bit_struct cxl_osc_control_bit[] = { + { OSC_CXL_ERROR_REPORTING_CONTROL, "CXLMemErrorReporting" }, +}; + static void decode_osc_bits(struct acpi_pci_root *root, char *msg, u32 word, struct pci_osc_bit_struct *table, int size) { @@ -168,6 +179,18 @@ static void decode_osc_control(struct acpi_pci_root *root, char *msg, u32 word) ARRAY_SIZE(pci_osc_control_bit)); } +static void decode_cxl_osc_support(struct acpi_pci_root *root, char *msg, u32 word) +{ + decode_osc_bits(root, msg, word, cxl_osc_support_bit, + ARRAY_SIZE(cxl_osc_support_bit)); +} + +static void decode_cxl_osc_control(struct acpi_pci_root *root, char *msg, u32 word) +{ + decode_osc_bits(root, msg, word, cxl_osc_control_bit, + ARRAY_SIZE(cxl_osc_control_bit)); +} + static inline bool is_pcie(struct acpi_pci_root *root) { return root->bridge_type == ACPI_BRIDGE_TYPE_PCIE; @@ -196,7 +219,8 @@ static int cap_length(struct acpi_pci_root *root) } static acpi_status acpi_pci_run_osc(struct acpi_pci_root *root, - const u32 *capbuf, u32 *retval) + const u32 *capbuf, u32 *pci_control, + u32 *cxl_control) { struct acpi_osc_context context = { .uuid_str = to_uuid(root), @@ -208,18 +232,20 @@ static acpi_status acpi_pci_run_osc(struct acpi_pci_root *root, status = acpi_run_osc(root->device->handle, &context); if (ACPI_SUCCESS(status)) { - *retval = acpi_osc_ctx_get_pci_control(&context); + *pci_control = acpi_osc_ctx_get_pci_control(&context); + if (is_cxl(root)) + *cxl_control = acpi_osc_ctx_get_cxl_control(&context); kfree(context.ret.pointer); } return status; } -static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, - u32 support, - u32 *control) +static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 support, + u32 *control, u32 cxl_support, + u32 *cxl_control) { acpi_status status; - u32 result, capbuf[OSC_CXL_CAPABILITY_DWORDS]; + u32 pci_result, cxl_result, capbuf[OSC_CXL_CAPABILITY_DWORDS]; support |= root->osc_support_set; @@ -227,11 +253,21 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, capbuf[OSC_SUPPORT_DWORD] = support; capbuf[OSC_CONTROL_DWORD] = *control | root->osc_control_set; + if (is_cxl(root)) { + cxl_support |= root->osc_ext_support_set; + capbuf[OSC_EXT_SUPPORT_DWORD] = cxl_support; + capbuf[OSC_EXT_CONTROL_DWORD] = *cxl_control | root->osc_ext_control_set; + } + retry: - status = acpi_pci_run_osc(root, capbuf, &result); + status = acpi_pci_run_osc(root, capbuf, &pci_result, &cxl_result); if (ACPI_SUCCESS(status)) { root->osc_support_set = support; - *control = result; + *control = pci_result; + if (is_cxl(root)) { + root->osc_ext_support_set = cxl_support; + *cxl_control = cxl_result; + } } else if (is_cxl(root)) { /* * CXL _OSC is optional on CXL 1.1 hosts. Fall back to PCIe _OSC @@ -354,6 +390,8 @@ EXPORT_SYMBOL_GPL(acpi_get_pci_dev); * @handle: ACPI handle of a PCI root bridge (or PCIe Root Complex). * @mask: Mask of _OSC bits to request control of, place to store control mask. * @support: _OSC supported capability. + * @cxl_mask: Mask of CXL _OSC control bits, place to store control mask. + * @cxl_support: CXL _OSC supported capability. * * Run _OSC query for @mask and if that is successful, compare the returned * mask of control bits with @req. If all of the @req bits are set in the @@ -364,12 +402,14 @@ EXPORT_SYMBOL_GPL(acpi_get_pci_dev); * _OSC bits the BIOS has granted control of, but its contents are meaningless * on failure. **/ -static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 support) +static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, + u32 support, u32 *cxl_mask, + u32 cxl_support) { u32 req = OSC_PCI_EXPRESS_CAPABILITY_CONTROL; struct acpi_pci_root *root; acpi_status status; - u32 ctrl, capbuf[OSC_CXL_CAPABILITY_DWORDS]; + u32 ctrl, cxl_ctrl = 0, capbuf[OSC_CXL_CAPABILITY_DWORDS]; if (!mask) return AE_BAD_PARAMETER; @@ -381,20 +421,42 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s ctrl = *mask; *mask |= root->osc_control_set; + if (is_cxl(root)) { + cxl_ctrl = *cxl_mask; + *cxl_mask |= root->osc_ext_control_set; + } + /* Need to check the available controls bits before requesting them. */ do { - status = acpi_pci_query_osc(root, support, mask); + u32 pci_missing = 0, cxl_missing = 0; + + status = acpi_pci_query_osc(root, support, mask, cxl_support, + cxl_mask); if (ACPI_FAILURE(status)) return status; - if (ctrl == *mask) - break; - decode_osc_control(root, "platform does not support", - ctrl & ~(*mask)); + if (is_cxl(root)) { + if (ctrl == *mask && cxl_ctrl == *cxl_mask) + break; + pci_missing = ctrl & ~(*mask); + cxl_missing = cxl_ctrl & ~(*cxl_mask); + } else { + if (ctrl == *mask) + break; + pci_missing = ctrl & ~(*mask); + } + if (pci_missing) + decode_osc_control(root, "platform does not support", + pci_missing); + if (cxl_missing) + decode_cxl_osc_control(root, "CXL platform does not support", + cxl_missing); ctrl = *mask; - } while (*mask); + cxl_ctrl = *cxl_mask; + } while (*mask || *cxl_mask); /* No need to request _OSC if the control was already granted. */ - if ((root->osc_control_set & ctrl) == ctrl) + if ((root->osc_control_set & ctrl) == ctrl && + (root->osc_ext_control_set & cxl_ctrl) == cxl_ctrl) return AE_OK; if ((ctrl & req) != req) { @@ -406,11 +468,17 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s capbuf[OSC_QUERY_DWORD] = 0; capbuf[OSC_SUPPORT_DWORD] = root->osc_support_set; capbuf[OSC_CONTROL_DWORD] = ctrl; - status = acpi_pci_run_osc(root, capbuf, mask); + if (is_cxl(root)) { + capbuf[OSC_EXT_SUPPORT_DWORD] = root->osc_ext_support_set; + capbuf[OSC_EXT_CONTROL_DWORD] = cxl_ctrl; + } + + status = acpi_pci_run_osc(root, capbuf, mask, cxl_mask); if (ACPI_FAILURE(status)) return status; root->osc_control_set = *mask; + root->osc_ext_control_set = *cxl_mask; return AE_OK; } @@ -436,6 +504,53 @@ static u32 calculate_support(void) return support; } +/* + * Background on hotplug support, and making it depend on only + * CONFIG_HOTPLUG_PCI_PCIE vs. also considering CONFIG_MEMORY_HOTPLUG: + * + * CONFIG_ACPI_HOTPLUG_MEMORY does depend on CONFIG_MEMORY_HOTPLUG, but + * there is no existing _OSC for memory hotplug support. The reason is that + * ACPI memory hotplug requires the OS to acknowledge / coordinate with + * memory plug events via a scan handler. On the CXL side the equivalent + * would be if Linux supported the Mechanical Retention Lock [1], or + * otherwise had some coordination for the driver of a PCI device + * undergoing hotplug to be consulted on whether the hotplug should + * proceed or not. + * + * The concern is that if Linux says no to supporting CXL hotplug then + * the BIOS may say no to giving the OS hotplug control of any other PCIe + * device. So the question here is not whether hotplug is enabled, it's + * whether it is handled natively by the at all OS, and if + * CONFIG_HOTPLUG_PCI_PCIE is enabled then the answer is "yes". + * + * Otherwise, the plan for CXL coordinated remove, since the kernel does + * not support blocking hotplug, is to require the memory device to be + * disabled before hotplug is attempted. When CONFIG_MEMORY_HOTPLUG is + * disabled that step will fail and the remove attempt cancelled by the + * user. If that is not honored and the card is removed anyway then it + * does not matter if CONFIG_MEMORY_HOTPLUG is enabled or not, it will + * cause a crash and other badness. + * + * Therefore, just say yes to CXL hotplug and require removal to + * be coordinated by userspace unless and until the kernel grows better + * mechanisms for doing "managed" removal of devices in consultation with + * the driver. + * + * [1]: https://lore.kernel.org/all/20201122014203.4706-1-ashok.raj@intel.com/ + */ +static u32 calculate_cxl_support(void) +{ + u32 support; + + support = OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT; + if (pci_aer_available()) + support |= OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT; + if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) + support |= OSC_CXL_NATIVE_HP_SUPPORT; + + return support; +} + static u32 calculate_control(void) { u32 control; @@ -467,6 +582,16 @@ static u32 calculate_control(void) return control; } +static u32 calculate_cxl_control(void) +{ + u32 control = 0; + + if (IS_ENABLED(CONFIG_MEMORY_FAILURE)) + control |= OSC_CXL_ERROR_REPORTING_CONTROL; + + return control; +} + static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) { struct acpi_device *device = root->device; @@ -488,6 +613,7 @@ static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) { u32 support, control = 0, requested = 0; + u32 cxl_support = 0, cxl_control = 0, cxl_requested = 0; acpi_status status; struct acpi_device *device = root->device; acpi_handle handle = device->handle; @@ -511,10 +637,20 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) if (os_control_query_checks(root, support)) requested = control = calculate_control(); - status = acpi_pci_osc_control_set(handle, &control, support); + if (is_cxl(root)) { + cxl_support = calculate_cxl_support(); + decode_cxl_osc_support(root, "OS supports", cxl_support); + cxl_requested = cxl_control = calculate_cxl_control(); + } + + status = acpi_pci_osc_control_set(handle, &control, support, + &cxl_control, cxl_support); if (ACPI_SUCCESS(status)) { if (control) decode_osc_control(root, "OS now controls", control); + if (cxl_control) + decode_cxl_osc_control(root, "OS now controls", + cxl_control); if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) { /* @@ -543,6 +679,11 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) decode_osc_control(root, "OS requested", requested); decode_osc_control(root, "platform willing to grant", control); } + if (cxl_control) { + decode_cxl_osc_control(root, "OS requested", cxl_requested); + decode_cxl_osc_control(root, "platform willing to grant", + cxl_control); + } dev_info(&device->dev, "_OSC: platform retains control of PCIe features (%s)\n", acpi_format_exception(status)); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 305ebf2a3fa7..4c463ae2777b 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -594,8 +594,10 @@ struct acpi_pci_root { int bridge_type; struct resource secondary; /* downstream bus range */ - u32 osc_support_set; /* _OSC state of support bits */ - u32 osc_control_set; /* _OSC state of control bits */ + u32 osc_support_set; /* _OSC state of support bits */ + u32 osc_control_set; /* _OSC state of control bits */ + u32 osc_ext_support_set; /* _OSC state of extended support bits */ + u32 osc_ext_control_set; /* _OSC state of extended control bits */ phys_addr_t mcfg_addr; }; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 82d91d9ccce5..378a431666b3 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -554,10 +554,12 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); #define OSC_PCI_CAPABILITY_DWORDS 3 #define OSC_CXL_CAPABILITY_DWORDS 5 -/* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */ +/* Indexes into _OSC Capabilities Buffer (DWORDs 2 to 5 are device-specific) */ #define OSC_QUERY_DWORD 0 /* DWORD 1 */ #define OSC_SUPPORT_DWORD 1 /* DWORD 2 */ #define OSC_CONTROL_DWORD 2 /* DWORD 3 */ +#define OSC_EXT_SUPPORT_DWORD 3 /* DWORD 4 */ +#define OSC_EXT_CONTROL_DWORD 4 /* DWORD 5 */ /* _OSC Capabilities DWORD 1: Query/Control and Error Returns (generic) */ #define OSC_QUERY_ENABLE 0x00000001 /* input */ @@ -612,6 +614,15 @@ extern u32 osc_sb_native_usb4_control; #define OSC_PCI_EXPRESS_LTR_CONTROL 0x00000020 #define OSC_PCI_EXPRESS_DPC_CONTROL 0x00000080 +/* CXL _OSC: Capabilities DWORD 4: Support Field */ +#define OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT 0x00000001 +#define OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT 0x00000002 +#define OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT 0x00000004 +#define OSC_CXL_NATIVE_HP_SUPPORT 0x00000008 + +/* CXL _OSC: Capabilities DWORD 5: Control Field */ +#define OSC_CXL_ERROR_REPORTING_CONTROL 0x00000001 + static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) { u32 *ret = context->ret.pointer; @@ -619,6 +630,13 @@ static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) return ret[OSC_CONTROL_DWORD]; } +static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) +{ + u32 *ret = context->ret.pointer; + + return ret[OSC_EXT_CONTROL_DWORD]; +} + #define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 #define ACPI_GSB_ACCESS_ATTRIB_BYTE 0x00000006 @@ -1021,6 +1039,11 @@ static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) return 0; } +static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) +{ + return 0; +} + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC -- cgit v1.2.3 From 3750d013182b071dbf458144540390de0031be8c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Apr 2022 08:33:13 -0700 Subject: cxl: Replace lockdep_mutex with local lock classes In response to an attempt to expand dev->lockdep_mutex for device_lock() validation [1], Peter points out [2] that the lockdep API already has the ability to assign a dedicated lock class per subsystem device-type. Use lockdep_set_class() to override the default device_lock() '__lockdep_no_validate__' class for each CXL subsystem device-type. This enables lockdep to detect deadlocks and recursive locking within the device-driver core and the subsystem. The lockdep_set_class_and_subclass() API is used for port objects that recursively lock the 'cxl_port_key' class by hierarchical topology depth. Link: https://lore.kernel.org/r/164982968798.684294.15817853329823976469.stgit@dwillia2-desk3.amr.corp.intel.com [1] Link: https://lore.kernel.org/r/Ylf0dewci8myLvoW@hirez.programming.kicks-ass.net [2] Suggested-by: Peter Zijlstra Cc: Ingo Molnar Cc: Will Deacon Cc: Waiman Long Cc: Boqun Feng Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Cc: Ben Widawsky Cc: Jonathan Cameron Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165055519317.3745911.7342499516839702840.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/memdev.c | 3 +++ drivers/cxl/core/pmem.c | 6 ++++++ drivers/cxl/core/port.c | 13 +++++++++---- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 1f76b28f9826..f7cdcd33504a 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -228,6 +228,8 @@ static void detach_memdev(struct work_struct *work) put_device(&cxlmd->dev); } +static struct lock_class_key cxl_memdev_key; + static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, const struct file_operations *fops) { @@ -247,6 +249,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, dev = &cxlmd->dev; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_memdev_key); dev->parent = cxlds->dev; dev->bus = &cxl_bus_type; dev->devt = MKDEV(cxl_mem_major, cxlmd->id); diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 8de240c4d96b..e825e261278d 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -80,6 +80,8 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd) } EXPORT_SYMBOL_NS_GPL(cxl_find_nvdimm_bridge, CXL); +static struct lock_class_key cxl_nvdimm_bridge_key; + static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port) { struct cxl_nvdimm_bridge *cxl_nvb; @@ -99,6 +101,7 @@ static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port) cxl_nvb->port = port; cxl_nvb->state = CXL_NVB_NEW; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_nvdimm_bridge_key); device_set_pm_not_required(dev); dev->parent = &port->dev; dev->bus = &cxl_bus_type; @@ -214,6 +217,8 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev) } EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm, CXL); +static struct lock_class_key cxl_nvdimm_key; + static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) { struct cxl_nvdimm *cxl_nvd; @@ -226,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) dev = &cxl_nvd->dev; cxl_nvd->cxlmd = cxlmd; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_nvdimm_key); device_set_pm_not_required(dev); dev->parent = &cxlmd->dev; dev->bus = &cxl_bus_type; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 2ab1ba4499b3..750aac95ed5f 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -391,6 +391,8 @@ static int devm_cxl_link_uport(struct device *host, struct cxl_port *port) return devm_add_action_or_reset(host, cxl_unlink_uport, port); } +static struct lock_class_key cxl_port_key; + static struct cxl_port *cxl_port_alloc(struct device *uport, resource_size_t component_reg_phys, struct cxl_port *parent_port) @@ -415,9 +417,10 @@ static struct cxl_port *cxl_port_alloc(struct device *uport, * description. */ dev = &port->dev; - if (parent_port) + if (parent_port) { dev->parent = &parent_port->dev; - else + port->depth = parent_port->depth + 1; + } else dev->parent = uport; port->uport = uport; @@ -427,6 +430,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport, INIT_LIST_HEAD(&port->endpoints); device_initialize(dev); + lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth); device_set_pm_not_required(dev); dev->bus = &cxl_bus_type; dev->type = &cxl_port_type; @@ -457,8 +461,6 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, if (IS_ERR(port)) return port; - if (parent_port) - port->depth = parent_port->depth + 1; dev = &port->dev; if (is_cxl_memdev(uport)) rc = dev_set_name(dev, "endpoint%d", port->id); @@ -1173,6 +1175,8 @@ static int decoder_populate_targets(struct cxl_decoder *cxld, return rc; } +static struct lock_class_key cxl_decoder_key; + /** * cxl_decoder_alloc - Allocate a new CXL decoder * @port: owning port of this decoder @@ -1214,6 +1218,7 @@ static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, seqlock_init(&cxld->target_lock); dev = &cxld->dev; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_decoder_key); device_set_pm_not_required(dev); dev->parent = &port->dev; dev->bus = &cxl_bus_type; -- cgit v1.2.3 From d864b8ea6468cf1dce614a58eec92a23d8e07fec Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 26 Apr 2022 12:22:44 -0700 Subject: cxl/acpi: Add root device lockdep validation The CXL "root" device, ACPI0017, is an attach point for coordinating platform level CXL resources and is the parent device for a CXL port topology tree. As such it has distinct locking rules relative to other CXL subsystem objects, but because it is an ACPI device the lock class is established well before it is given to the cxl_acpi driver. However, the lockdep API does support changing the lock class "live" for situations like this. Add a device_lock_set_class() helper that a driver can use in ->probe() to set a custom lock class, and device_lock_reset_class() to return to the default "no validate" class before the custom lock class key goes out of scope after ->remove(). Note the helpers are all macros to support dead code elimination in the CONFIG_PROVE_LOCKING=n case, however device_set_lock_class() still needs #ifdef CONFIG_PROVE_LOCKING since lockdep_match_class() explicitly does not have a helper in the CONFIG_PROVE_LOCKING=n case (see comment in lockdep.h). The lockdep API needs 2 small tweaks to prevent "unused" warnings for the @key argument to lock_set_class(), and a new lock_set_novalidate_class() is added to supplement lockdep_set_novalidate_class() in the cases where the lock class is converted while the lock is held. Suggested-by: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Ingo Molnar Cc: Will Deacon Cc: Waiman Long Cc: Boqun Feng Cc: Alison Schofield Cc: Vishal Verma Cc: Ben Widawsky Cc: Jonathan Cameron Reviewed-by: Greg Kroah-Hartman Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165100081305.1528964.11138612430659737238.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 13 +++++++++++++ include/linux/device.h | 43 +++++++++++++++++++++++++++++++++++++++++++ include/linux/lockdep.h | 6 +++++- 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index d15a6aec0331..40286f5df812 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -275,6 +275,13 @@ static int add_root_nvdimm_bridge(struct device *match, void *data) return 1; } +static struct lock_class_key cxl_root_key; + +static void cxl_acpi_lock_reset_class(void *dev) +{ + device_lock_reset_class(dev); +} + static int cxl_acpi_probe(struct platform_device *pdev) { int rc; @@ -283,6 +290,12 @@ static int cxl_acpi_probe(struct platform_device *pdev) struct acpi_device *adev = ACPI_COMPANION(host); struct cxl_cfmws_context ctx; + device_lock_set_class(&pdev->dev, &cxl_root_key); + rc = devm_add_action_or_reset(&pdev->dev, cxl_acpi_lock_reset_class, + &pdev->dev); + if (rc) + return rc; + root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); if (IS_ERR(root_port)) return PTR_ERR(root_port); diff --git a/include/linux/device.h b/include/linux/device.h index 93459724dcde..833b0b3b0193 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -850,6 +850,49 @@ static inline bool device_supports_offline(struct device *dev) return dev->bus && dev->bus->offline && dev->bus->online; } +#define __device_lock_set_class(dev, name, key) \ +do { \ + struct device *__d2 __maybe_unused = dev; \ + lock_set_class(&__d2->mutex.dep_map, name, key, 0, _THIS_IP_); \ +} while (0) + +/** + * device_lock_set_class - Specify a temporary lock class while a device + * is attached to a driver + * @dev: device to modify + * @key: lock class key data + * + * This must be called with the device_lock() already held, for example + * from driver ->probe(). Take care to only override the default + * lockdep_no_validate class. + */ +#ifdef CONFIG_LOCKDEP +#define device_lock_set_class(dev, key) \ +do { \ + struct device *__d = dev; \ + dev_WARN_ONCE(__d, !lockdep_match_class(&__d->mutex, \ + &__lockdep_no_validate__), \ + "overriding existing custom lock class\n"); \ + __device_lock_set_class(__d, #key, key); \ +} while (0) +#else +#define device_lock_set_class(dev, key) __device_lock_set_class(dev, #key, key) +#endif + +/** + * device_lock_reset_class - Return a device to the default lockdep novalidate state + * @dev: device to modify + * + * This must be called with the device_lock() already held, for example + * from driver ->remove(). + */ +#define device_lock_reset_class(dev) \ +do { \ + struct device *__d __maybe_unused = dev; \ + lock_set_novalidate_class(&__d->mutex.dep_map, "&dev->mutex", \ + _THIS_IP_); \ +} while (0) + void lock_device_hotplug(void); void unlock_device_hotplug(void); int lock_device_hotplug_sysfs(void); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 467b94257105..43b0dc6a0b21 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -290,6 +290,9 @@ extern void lock_set_class(struct lockdep_map *lock, const char *name, struct lock_class_key *key, unsigned int subclass, unsigned long ip); +#define lock_set_novalidate_class(l, n, i) \ + lock_set_class(l, n, &__lockdep_no_validate__, 0, i) + static inline void lock_set_subclass(struct lockdep_map *lock, unsigned int subclass, unsigned long ip) { @@ -357,7 +360,8 @@ static inline void lockdep_set_selftest_task(struct task_struct *task) # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) # define lock_release(l, i) do { } while (0) # define lock_downgrade(l, i) do { } while (0) -# define lock_set_class(l, n, k, s, i) do { } while (0) +# define lock_set_class(l, n, key, s, i) do { (void)(key); } while (0) +# define lock_set_novalidate_class(l, n, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_init_map_type(lock, name, key, sub, inner, outer, type) \ -- cgit v1.2.3 From 38a34e10768c85d3be4bb31fea5d8942bb72bbd7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Apr 2022 08:33:23 -0700 Subject: cxl: Drop cxl_device_lock() Now that all CXL subsystem locking is validated with custom lock classes, there is no need for the custom usage of the lockdep_mutex. Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Cc: Ben Widawsky Cc: Jonathan Cameron Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165055520383.3745911.53447786039115271.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pmem.c | 4 +-- drivers/cxl/core/port.c | 55 +++++++++++++++------------------- drivers/cxl/cxl.h | 78 ------------------------------------------------- drivers/cxl/mem.c | 4 +-- drivers/cxl/pmem.c | 12 ++++---- lib/Kconfig.debug | 6 ---- 6 files changed, 33 insertions(+), 126 deletions(-) diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index e825e261278d..bec7cfb54ebf 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -124,10 +124,10 @@ static void unregister_nvb(void *_cxl_nvb) * work to flush. Once the state has been changed to 'dead' then no new * work can be queued by user-triggered bind. */ - cxl_device_lock(&cxl_nvb->dev); + device_lock(&cxl_nvb->dev); flush = cxl_nvb->state != CXL_NVB_NEW; cxl_nvb->state = CXL_NVB_DEAD; - cxl_device_unlock(&cxl_nvb->dev); + device_unlock(&cxl_nvb->dev); /* * Even though the device core will trigger device_release_driver() diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 750aac95ed5f..ea60abda6500 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -312,10 +312,10 @@ static void cxl_port_release(struct device *dev) struct cxl_port *port = to_cxl_port(dev); struct cxl_ep *ep, *_e; - cxl_device_lock(dev); + device_lock(dev); list_for_each_entry_safe(ep, _e, &port->endpoints, list) cxl_ep_release(ep); - cxl_device_unlock(dev); + device_unlock(dev); ida_free(&cxl_port_ida, port->id); kfree(port); } @@ -556,7 +556,7 @@ static int match_root_child(struct device *dev, const void *match) return 0; port = to_cxl_port(dev); - cxl_device_lock(dev); + device_lock(dev); list_for_each_entry(dport, &port->dports, list) { iter = match; while (iter) { @@ -566,7 +566,7 @@ static int match_root_child(struct device *dev, const void *match) } } out: - cxl_device_unlock(dev); + device_unlock(dev); return !!iter; } @@ -625,13 +625,13 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new) static void cond_cxl_root_lock(struct cxl_port *port) { if (is_cxl_root(port)) - cxl_device_lock(&port->dev); + device_lock(&port->dev); } static void cond_cxl_root_unlock(struct cxl_port *port) { if (is_cxl_root(port)) - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); } static void cxl_dport_remove(void *data) @@ -738,15 +738,15 @@ static int add_ep(struct cxl_port *port, struct cxl_ep *new) { struct cxl_ep *dup; - cxl_device_lock(&port->dev); + device_lock(&port->dev); if (port->dead) { - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return -ENXIO; } dup = find_ep(port, new->ep); if (!dup) list_add_tail(&new->list, &port->endpoints); - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return dup ? -EEXIST : 0; } @@ -856,12 +856,12 @@ static void delete_endpoint(void *data) goto out; parent = &parent_port->dev; - cxl_device_lock(parent); + device_lock(parent); if (parent->driver && endpoint->uport) { devm_release_action(parent, cxl_unlink_uport, endpoint); devm_release_action(parent, unregister_port, endpoint); } - cxl_device_unlock(parent); + device_unlock(parent); put_device(parent); out: put_device(&endpoint->dev); @@ -922,7 +922,7 @@ static void cxl_detach_ep(void *data) } parent_port = to_cxl_port(port->dev.parent); - cxl_device_lock(&parent_port->dev); + device_lock(&parent_port->dev); if (!parent_port->dev.driver) { /* * The bottom-up race to delete the port lost to a @@ -930,12 +930,12 @@ static void cxl_detach_ep(void *data) * parent_port ->remove() will have cleaned up all * descendants. */ - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); put_device(&port->dev); continue; } - cxl_device_lock(&port->dev); + device_lock(&port->dev); ep = find_ep(port, &cxlmd->dev); dev_dbg(&cxlmd->dev, "disconnect %s from %s\n", ep ? dev_name(ep->ep) : "", dev_name(&port->dev)); @@ -950,7 +950,7 @@ static void cxl_detach_ep(void *data) port->dead = true; list_splice_init(&port->dports, &reap_dports); } - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); if (!list_empty(&reap_dports)) { dev_dbg(&cxlmd->dev, "delete %s\n", @@ -958,7 +958,7 @@ static void cxl_detach_ep(void *data) delete_switch_port(port, &reap_dports); } put_device(&port->dev); - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); } } @@ -1006,7 +1006,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, return -EAGAIN; } - cxl_device_lock(&parent_port->dev); + device_lock(&parent_port->dev); if (!parent_port->dev.driver) { dev_warn(&cxlmd->dev, "port %s:%s disabled, failed to enumerate CXL.mem\n", @@ -1024,7 +1024,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, get_device(&port->dev); } out: - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); if (IS_ERR(port)) rc = PTR_ERR(port); @@ -1135,14 +1135,14 @@ struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port, { struct cxl_dport *dport; - cxl_device_lock(&port->dev); + device_lock(&port->dev); list_for_each_entry(dport, &port->dports, list) if (dport->dport == dev) { - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return dport; } - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return NULL; } EXPORT_SYMBOL_NS_GPL(cxl_find_dport_by_dev, CXL); @@ -1384,9 +1384,9 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) port = to_cxl_port(cxld->dev.parent); - cxl_device_lock(&port->dev); + device_lock(&port->dev); rc = cxl_decoder_add_locked(cxld, target_map); - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return rc; } @@ -1457,14 +1457,7 @@ static int cxl_bus_probe(struct device *dev) { int rc; - /* - * Take the CXL nested lock since the driver core only holds - * @dev->mutex and not @dev->lockdep_mutex. - */ - cxl_nested_lock(dev); rc = to_cxl_drv(dev->driver)->probe(dev); - cxl_nested_unlock(dev); - dev_dbg(dev, "probe: %d\n", rc); return rc; } @@ -1473,10 +1466,8 @@ static void cxl_bus_remove(struct device *dev) { struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver); - cxl_nested_lock(dev); if (cxl_drv->remove) cxl_drv->remove(dev); - cxl_nested_unlock(dev); } static struct workqueue_struct *cxl_bus_wq; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 990b6670222e..140dc3278cde 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -405,82 +405,4 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd); #define __mock static #endif -#ifdef CONFIG_PROVE_CXL_LOCKING -enum cxl_lock_class { - CXL_ANON_LOCK, - CXL_NVDIMM_LOCK, - CXL_NVDIMM_BRIDGE_LOCK, - CXL_PORT_LOCK, - /* - * Be careful to add new lock classes here, CXL_PORT_LOCK is - * extended by the port depth, so a maximum CXL port topology - * depth would need to be defined first. - */ -}; - -static inline void cxl_nested_lock(struct device *dev) -{ - if (is_cxl_port(dev)) { - struct cxl_port *port = to_cxl_port(dev); - - mutex_lock_nested(&dev->lockdep_mutex, - CXL_PORT_LOCK + port->depth); - } else if (is_cxl_decoder(dev)) { - struct cxl_port *port = to_cxl_port(dev->parent); - - /* - * A decoder is the immediate child of a port, so set - * its lock class equal to other child device siblings. - */ - mutex_lock_nested(&dev->lockdep_mutex, - CXL_PORT_LOCK + port->depth + 1); - } else if (is_cxl_nvdimm_bridge(dev)) - mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_BRIDGE_LOCK); - else if (is_cxl_nvdimm(dev)) - mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_LOCK); - else - mutex_lock_nested(&dev->lockdep_mutex, CXL_ANON_LOCK); -} - -static inline void cxl_nested_unlock(struct device *dev) -{ - mutex_unlock(&dev->lockdep_mutex); -} - -static inline void cxl_device_lock(struct device *dev) -{ - /* - * For double lock errors the lockup will happen before lockdep - * warns at cxl_nested_lock(), so assert explicitly. - */ - lockdep_assert_not_held(&dev->lockdep_mutex); - - device_lock(dev); - cxl_nested_lock(dev); -} - -static inline void cxl_device_unlock(struct device *dev) -{ - cxl_nested_unlock(dev); - device_unlock(dev); -} -#else -static inline void cxl_nested_lock(struct device *dev) -{ -} - -static inline void cxl_nested_unlock(struct device *dev) -{ -} - -static inline void cxl_device_lock(struct device *dev) -{ - device_lock(dev); -} - -static inline void cxl_device_unlock(struct device *dev) -{ - device_unlock(dev); -} -#endif #endif /* __CXL_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 1bd2e0b67f59..401b0fbe21db 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -187,7 +187,7 @@ static int cxl_mem_probe(struct device *dev) return -ENXIO; } - cxl_device_lock(&parent_port->dev); + device_lock(&parent_port->dev); if (!parent_port->dev.driver) { dev_err(dev, "CXL port topology %s not enabled\n", dev_name(&parent_port->dev)); @@ -197,7 +197,7 @@ static int cxl_mem_probe(struct device *dev) rc = create_endpoint(cxlmd, parent_port); out: - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); put_device(&parent_port->dev); /* diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 404018dd99e6..bbeef91e637e 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -43,7 +43,7 @@ static int cxl_nvdimm_probe(struct device *dev) if (!cxl_nvb) return -ENXIO; - cxl_device_lock(&cxl_nvb->dev); + device_lock(&cxl_nvb->dev); if (!cxl_nvb->nvdimm_bus) { rc = -ENXIO; goto out; @@ -68,7 +68,7 @@ static int cxl_nvdimm_probe(struct device *dev) dev_set_drvdata(dev, nvdimm); rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); out: - cxl_device_unlock(&cxl_nvb->dev); + device_unlock(&cxl_nvb->dev); put_device(&cxl_nvb->dev); return rc; @@ -233,7 +233,7 @@ static void cxl_nvb_update_state(struct work_struct *work) struct nvdimm_bus *victim_bus = NULL; bool release = false, rescan = false; - cxl_device_lock(&cxl_nvb->dev); + device_lock(&cxl_nvb->dev); switch (cxl_nvb->state) { case CXL_NVB_ONLINE: if (!online_nvdimm_bus(cxl_nvb)) { @@ -251,7 +251,7 @@ static void cxl_nvb_update_state(struct work_struct *work) default: break; } - cxl_device_unlock(&cxl_nvb->dev); + device_unlock(&cxl_nvb->dev); if (release) device_release_driver(&cxl_nvb->dev); @@ -327,9 +327,9 @@ static int cxl_nvdimm_bridge_reset(struct device *dev, void *data) return 0; cxl_nvb = to_cxl_nvdimm_bridge(dev); - cxl_device_lock(dev); + device_lock(dev); cxl_nvb->state = CXL_NVB_NEW; - cxl_device_unlock(dev); + device_unlock(dev); return 0; } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 075cd25363ac..4a2c853c948b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1559,12 +1559,6 @@ config PROVE_NVDIMM_LOCKING help Enable lockdep to validate nd_device_lock() usage. -config PROVE_CXL_LOCKING - bool "CXL" - depends on CXL_BUS - help - Enable lockdep to validate cxl_device_lock() usage. - endchoice endmenu # lock debugging -- cgit v1.2.3 From 4a0079bc7aae5a003ecc548090b75c96d3abf490 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Apr 2022 08:33:29 -0700 Subject: nvdimm: Replace lockdep_mutex with local lock classes In response to an attempt to expand dev->lockdep_mutex for device_lock() validation [1], Peter points out [2] that the lockdep API already has the ability to assign a dedicated lock class per subsystem device-type. Use lockdep_set_class() to override the default device_lock() '__lockdep_no_validate__' class for each NVDIMM subsystem device-type. This enables lockdep to detect deadlocks and recursive locking within the device-driver core and the subsystem. Link: https://lore.kernel.org/r/164982968798.684294.15817853329823976469.stgit@dwillia2-desk3.amr.corp.intel.com [1] Link: https://lore.kernel.org/r/Ylf0dewci8myLvoW@hirez.programming.kicks-ass.net [2] Suggested-by: Peter Zijlstra Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165055520896.3745911.8021255583475547548.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/btt_devs.c | 7 +++++-- drivers/nvdimm/bus.c | 14 +++++++------- drivers/nvdimm/dax_devs.c | 4 ++-- drivers/nvdimm/dimm_devs.c | 4 ++++ drivers/nvdimm/namespace_devs.c | 10 +++++++++- drivers/nvdimm/nd-core.h | 2 +- drivers/nvdimm/pfn_devs.c | 7 +++++-- drivers/nvdimm/region_devs.c | 4 ++++ 8 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index e5a58520d398..120821796668 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -178,6 +178,8 @@ bool is_nd_btt(struct device *dev) } EXPORT_SYMBOL(is_nd_btt); +static struct lock_class_key nvdimm_btt_key; + static struct device *__nd_btt_create(struct nd_region *nd_region, unsigned long lbasize, uuid_t *uuid, struct nd_namespace_common *ndns) @@ -205,6 +207,7 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, dev->parent = &nd_region->dev; dev->type = &nd_btt_device_type; device_initialize(&nd_btt->dev); + lockdep_set_class(&nd_btt->dev.mutex, &nvdimm_btt_key); if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) { dev_dbg(&ndns->dev, "failed, already claimed by %s\n", dev_name(ndns->claim)); @@ -225,7 +228,7 @@ struct device *nd_btt_create(struct nd_region *nd_region) { struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL); - __nd_device_register(dev); + nd_device_register(dev); return dev; } @@ -324,7 +327,7 @@ static int __nd_btt_probe(struct nd_btt *nd_btt, if (!nd_btt->uuid) return -ENOMEM; - __nd_device_register(&nd_btt->dev); + nd_device_register(&nd_btt->dev); return 0; } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 7b0d1443217a..85ffa04e93c2 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -334,6 +334,8 @@ struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm) } EXPORT_SYMBOL_GPL(nvdimm_to_bus); +static struct lock_class_key nvdimm_bus_key; + struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nd_desc) { @@ -360,6 +362,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, nvdimm_bus->dev.bus = &nvdimm_bus_type; nvdimm_bus->dev.of_node = nd_desc->of_node; device_initialize(&nvdimm_bus->dev); + lockdep_set_class(&nvdimm_bus->dev.mutex, &nvdimm_bus_key); device_set_pm_not_required(&nvdimm_bus->dev); rc = dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); if (rc) @@ -511,7 +514,7 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie) put_device(dev); } -void __nd_device_register(struct device *dev) +void nd_device_register(struct device *dev) { if (!dev) return; @@ -537,12 +540,6 @@ void __nd_device_register(struct device *dev) async_schedule_dev_domain(nd_async_device_register, dev, &nd_async_domain); } - -void nd_device_register(struct device *dev) -{ - device_initialize(dev); - __nd_device_register(dev); -} EXPORT_SYMBOL(nd_device_register); void nd_device_unregister(struct device *dev, enum nd_async_mode mode) @@ -724,6 +721,8 @@ static void ndctl_release(struct device *dev) kfree(dev); } +static struct lock_class_key nvdimm_ndctl_key; + int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) { dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id); @@ -734,6 +733,7 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) if (!dev) return -ENOMEM; device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_ndctl_key); device_set_pm_not_required(dev); dev->class = nd_class; dev->parent = &nvdimm_bus->dev; diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c index 99965077bac4..7f4a9d28b670 100644 --- a/drivers/nvdimm/dax_devs.c +++ b/drivers/nvdimm/dax_devs.c @@ -80,7 +80,7 @@ struct device *nd_dax_create(struct nd_region *nd_region) nd_dax = nd_dax_alloc(nd_region); if (nd_dax) dev = nd_pfn_devinit(&nd_dax->nd_pfn, NULL); - __nd_device_register(dev); + nd_device_register(dev); return dev; } @@ -119,7 +119,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) nd_detach_ndns(dax_dev, &nd_pfn->ndns); put_device(dax_dev); } else - __nd_device_register(dax_dev); + nd_device_register(dax_dev); return rc; } diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index ee507eed42b5..27b8f8cd1b48 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -570,6 +570,8 @@ bool is_nvdimm(struct device *dev) return dev->type == &nvdimm_device_type; } +static struct lock_class_key nvdimm_key; + struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, unsigned long cmd_mask, int num_flush, @@ -613,6 +615,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, /* get security state and extended (master) state */ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); + device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_key); nd_device_register(dev); return nvdimm; diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 62b83b2e26e3..5197a813849d 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1830,6 +1830,8 @@ static struct device *nd_namespace_pmem_create(struct nd_region *nd_region) return dev; } +static struct lock_class_key nvdimm_namespace_key; + void nd_region_create_ns_seed(struct nd_region *nd_region) { WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); @@ -1845,8 +1847,12 @@ void nd_region_create_ns_seed(struct nd_region *nd_region) */ if (!nd_region->ns_seed) dev_err(&nd_region->dev, "failed to create namespace\n"); - else + else { + device_initialize(nd_region->ns_seed); + lockdep_set_class(&nd_region->ns_seed->mutex, + &nvdimm_namespace_key); nd_device_register(nd_region->ns_seed); + } } void nd_region_create_dax_seed(struct nd_region *nd_region) @@ -2200,6 +2206,8 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err) if (id < 0) break; dev_set_name(dev, "namespace%d.%d", nd_region->id, id); + device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_namespace_key); nd_device_register(dev); } if (i) diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 448f9dcb4bb7..99b04106434b 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -106,7 +106,7 @@ void nd_region_create_dax_seed(struct nd_region *nd_region); int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); void nd_synchronize(void); -void __nd_device_register(struct device *dev); +void nd_device_register(struct device *dev); struct nd_label_id; char *nd_label_gen_id(struct nd_label_id *label_id, const uuid_t *uuid, u32 flags); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index c31e184bfa45..0cd396d0024c 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -291,6 +291,8 @@ bool is_nd_pfn(struct device *dev) } EXPORT_SYMBOL(is_nd_pfn); +static struct lock_class_key nvdimm_pfn_key; + struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, struct nd_namespace_common *ndns) { @@ -303,6 +305,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, nd_pfn->align = nd_pfn_default_alignment(); dev = &nd_pfn->dev; device_initialize(&nd_pfn->dev); + lockdep_set_class(&nd_pfn->dev.mutex, &nvdimm_pfn_key); if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { dev_dbg(&ndns->dev, "failed, already claimed by %s\n", dev_name(ndns->claim)); @@ -346,7 +349,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region) nd_pfn = nd_pfn_alloc(nd_region); dev = nd_pfn_devinit(nd_pfn, NULL); - __nd_device_register(dev); + nd_device_register(dev); return dev; } @@ -643,7 +646,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) nd_detach_ndns(pfn_dev, &nd_pfn->ndns); put_device(pfn_dev); } else - __nd_device_register(pfn_dev); + nd_device_register(pfn_dev); return rc; } diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 0cb274c2b508..8650e8d39c89 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -949,6 +949,8 @@ static unsigned long default_align(struct nd_region *nd_region) return align; } +static struct lock_class_key nvdimm_region_key; + static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, struct nd_region_desc *ndr_desc, const struct device_type *dev_type, const char *caller) @@ -1035,6 +1037,8 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, else nd_region->flush = NULL; + device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_region_key); nd_device_register(dev); return nd_region; -- cgit v1.2.3 From 1550a17a7da2936281fda5e87fc454cee3c9c683 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Apr 2022 08:33:34 -0700 Subject: ACPI: NFIT: Drop nfit_device_lock() The nfit_device_lock() helper was added to provide lockdep coverage for the NFIT driver's usage of device_lock() on the nvdimm_bus object. Now that nvdimm_bus objects have their own lock class this wrapper can be dropped. Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165055521409.3745911.8085645201146909612.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 30 +++++++++++++++--------------- drivers/acpi/nfit/nfit.h | 24 ------------------------ 2 files changed, 15 insertions(+), 39 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index fe61f617a943..ae5f4acf2675 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1230,7 +1230,7 @@ static ssize_t hw_error_scrub_store(struct device *dev, if (rc) return rc; - nfit_device_lock(dev); + device_lock(dev); nd_desc = dev_get_drvdata(dev); if (nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); @@ -1247,7 +1247,7 @@ static ssize_t hw_error_scrub_store(struct device *dev, break; } } - nfit_device_unlock(dev); + device_unlock(dev); if (rc) return rc; return size; @@ -1267,10 +1267,10 @@ static ssize_t scrub_show(struct device *dev, ssize_t rc = -ENXIO; bool busy; - nfit_device_lock(dev); + device_lock(dev); nd_desc = dev_get_drvdata(dev); if (!nd_desc) { - nfit_device_unlock(dev); + device_unlock(dev); return rc; } acpi_desc = to_acpi_desc(nd_desc); @@ -1287,7 +1287,7 @@ static ssize_t scrub_show(struct device *dev, } mutex_unlock(&acpi_desc->init_mutex); - nfit_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1304,14 +1304,14 @@ static ssize_t scrub_store(struct device *dev, if (val != 1) return -EINVAL; - nfit_device_lock(dev); + device_lock(dev); nd_desc = dev_get_drvdata(dev); if (nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); rc = acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG); } - nfit_device_unlock(dev); + device_unlock(dev); if (rc) return rc; return size; @@ -1697,9 +1697,9 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data) struct acpi_device *adev = data; struct device *dev = &adev->dev; - nfit_device_lock(dev->parent); + device_lock(dev->parent); __acpi_nvdimm_notify(dev, event); - nfit_device_unlock(dev->parent); + device_unlock(dev->parent); } static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method) @@ -3152,8 +3152,8 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) struct device *dev = acpi_desc->dev; /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ - nfit_device_lock(dev); - nfit_device_unlock(dev); + device_lock(dev); + device_unlock(dev); /* Bounce the init_mutex to complete initial registration */ mutex_lock(&acpi_desc->init_mutex); @@ -3305,8 +3305,8 @@ void acpi_nfit_shutdown(void *data) * acpi_nfit_ars_rescan() submissions have had a chance to * either submit or see ->cancel set. */ - nfit_device_lock(bus_dev); - nfit_device_unlock(bus_dev); + device_lock(bus_dev); + device_unlock(bus_dev); flush_workqueue(nfit_wq); } @@ -3449,9 +3449,9 @@ EXPORT_SYMBOL_GPL(__acpi_nfit_notify); static void acpi_nfit_notify(struct acpi_device *adev, u32 event) { - nfit_device_lock(&adev->dev); + device_lock(&adev->dev); __acpi_nfit_notify(&adev->dev, adev->handle, event); - nfit_device_unlock(&adev->dev); + device_unlock(&adev->dev); } static const struct acpi_device_id acpi_nfit_ids[] = { diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 50882bdbeb96..6023ad61831a 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -337,30 +337,6 @@ static inline struct acpi_nfit_desc *to_acpi_desc( return container_of(nd_desc, struct acpi_nfit_desc, nd_desc); } -#ifdef CONFIG_PROVE_LOCKING -static inline void nfit_device_lock(struct device *dev) -{ - device_lock(dev); - mutex_lock(&dev->lockdep_mutex); -} - -static inline void nfit_device_unlock(struct device *dev) -{ - mutex_unlock(&dev->lockdep_mutex); - device_unlock(dev); -} -#else -static inline void nfit_device_lock(struct device *dev) -{ - device_lock(dev); -} - -static inline void nfit_device_unlock(struct device *dev) -{ - device_unlock(dev); -} -#endif - const guid_t *to_nfit_uuid(enum nfit_uuids id); int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); void acpi_nfit_shutdown(void *data); -- cgit v1.2.3 From 81beea55cb7407a52bac16f7d01a415e008c910f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Apr 2022 08:33:39 -0700 Subject: nvdimm: Drop nd_device_lock() Now that all NVDIMM subsystem locking is validated with custom lock classes, there is no need for the custom usage of the lockdep_mutex. Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165055521979.3745911.10751769706032029999.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/btt_devs.c | 16 +++++----- drivers/nvdimm/bus.c | 24 ++++++--------- drivers/nvdimm/core.c | 10 +++---- drivers/nvdimm/dimm_devs.c | 8 ++--- drivers/nvdimm/namespace_devs.c | 36 +++++++++++----------- drivers/nvdimm/nd-core.h | 66 ----------------------------------------- drivers/nvdimm/pfn_devs.c | 24 +++++++-------- drivers/nvdimm/pmem.c | 2 +- drivers/nvdimm/region.c | 2 +- drivers/nvdimm/region_devs.c | 16 +++++----- lib/Kconfig.debug | 17 ----------- 11 files changed, 66 insertions(+), 155 deletions(-) diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 120821796668..fabbb31f2c35 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -50,14 +50,14 @@ static ssize_t sector_size_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_btt->lbasize, btt_lbasize_supported); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -79,11 +79,11 @@ static ssize_t uuid_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -108,13 +108,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -126,14 +126,14 @@ static ssize_t size_show(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) rc = sprintf(buf, "%llu\n", nd_btt->size); else { /* no size to convey if the btt instance is disabled */ rc = -ENXIO; } - nd_device_unlock(dev); + device_unlock(dev); return rc; } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 85ffa04e93c2..a4fc17db707c 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -88,10 +88,7 @@ static int nvdimm_bus_probe(struct device *dev) dev->driver->name, dev_name(dev)); nvdimm_bus_probe_start(nvdimm_bus); - debug_nvdimm_lock(dev); rc = nd_drv->probe(dev); - debug_nvdimm_unlock(dev); - if ((rc == 0 || rc == -EOPNOTSUPP) && dev->parent && is_nd_region(dev->parent)) nd_region_advance_seeds(to_nd_region(dev->parent), dev); @@ -111,11 +108,8 @@ static void nvdimm_bus_remove(struct device *dev) struct module *provider = to_bus_provider(dev); struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); - if (nd_drv->remove) { - debug_nvdimm_lock(dev); + if (nd_drv->remove) nd_drv->remove(dev); - debug_nvdimm_unlock(dev); - } dev_dbg(&nvdimm_bus->dev, "%s.remove(%s)\n", dev->driver->name, dev_name(dev)); @@ -139,7 +133,7 @@ static void nvdimm_bus_shutdown(struct device *dev) void nd_device_notify(struct device *dev, enum nvdimm_event event) { - nd_device_lock(dev); + device_lock(dev); if (dev->driver) { struct nd_device_driver *nd_drv; @@ -147,7 +141,7 @@ void nd_device_notify(struct device *dev, enum nvdimm_event event) if (nd_drv->notify) nd_drv->notify(dev, event); } - nd_device_unlock(dev); + device_unlock(dev); } EXPORT_SYMBOL(nd_device_notify); @@ -569,9 +563,9 @@ void nd_device_unregister(struct device *dev, enum nd_async_mode mode) * or otherwise let the async path handle it if the * unregistration was already queued. */ - nd_device_lock(dev); + device_lock(dev); killed = kill_device(dev); - nd_device_unlock(dev); + device_unlock(dev); if (!killed) return; @@ -930,10 +924,10 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) if (nvdimm_bus->probe_active == 0) break; nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); wait_event(nvdimm_bus->wait, nvdimm_bus->probe_active == 0); - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); } while (true); } @@ -1167,7 +1161,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, goto out; } - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf); if (rc) @@ -1189,7 +1183,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, out_unlock: nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); out: kfree(in_env); kfree(out_env); diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 69a03358817f..144926b7451c 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -215,7 +215,7 @@ EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev); * * Enforce that uuids can only be changed while the device is disabled * (driver detached) - * LOCKING: expects nd_device_lock() is held on entry + * LOCKING: expects device_lock() is held on entry */ int nd_uuid_store(struct device *dev, uuid_t **uuid_out, const char *buf, size_t len) @@ -316,15 +316,15 @@ static DEVICE_ATTR_RO(provider); static int flush_namespaces(struct device *dev, void *data) { - nd_device_lock(dev); - nd_device_unlock(dev); + device_lock(dev); + device_unlock(dev); return 0; } static int flush_regions_dimms(struct device *dev, void *data) { - nd_device_lock(dev); - nd_device_unlock(dev); + device_lock(dev); + device_unlock(dev); device_for_each_child(dev, NULL, flush_namespaces); return 0; } diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 27b8f8cd1b48..c7c980577491 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -341,9 +341,9 @@ static ssize_t available_slots_show(struct device *dev, { ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = __available_slots_show(dev_get_drvdata(dev), buf); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -386,12 +386,12 @@ static ssize_t security_store(struct device *dev, * done while probing is idle and the DIMM is not in active use * in any region. */ - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = nvdimm_security_store(dev, buf, len); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc; } diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 5197a813849d..bf4f5c09d9b1 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -264,7 +264,7 @@ static ssize_t alt_name_store(struct device *dev, struct nd_region *nd_region = to_nd_region(dev->parent); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __alt_name_store(dev, buf, len); @@ -272,7 +272,7 @@ static ssize_t alt_name_store(struct device *dev, rc = nd_namespace_label_update(nd_region, dev); dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -846,7 +846,7 @@ static ssize_t size_store(struct device *dev, if (rc) return rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __size_store(dev, val); @@ -868,7 +868,7 @@ static ssize_t size_store(struct device *dev, dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -1043,7 +1043,7 @@ static ssize_t uuid_store(struct device *dev, } else return -ENXIO; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); if (to_ndns(dev)->claim) @@ -1059,7 +1059,7 @@ static ssize_t uuid_store(struct device *dev, dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -1118,7 +1118,7 @@ static ssize_t sector_size_store(struct device *dev, } else return -ENXIO; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); if (to_ndns(dev)->claim) rc = -EBUSY; @@ -1129,7 +1129,7 @@ static ssize_t sector_size_store(struct device *dev, dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote", buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -1239,9 +1239,9 @@ static ssize_t holder_show(struct device *dev, struct nd_namespace_common *ndns = to_ndns(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : ""); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1278,7 +1278,7 @@ static ssize_t holder_class_store(struct device *dev, struct nd_region *nd_region = to_nd_region(dev->parent); int rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __holder_class_store(dev, buf); @@ -1286,7 +1286,7 @@ static ssize_t holder_class_store(struct device *dev, rc = nd_namespace_label_update(nd_region, dev); dev_dbg(dev, "%s(%d)\n", rc < 0 ? "fail " : "", rc); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -1297,7 +1297,7 @@ static ssize_t holder_class_show(struct device *dev, struct nd_namespace_common *ndns = to_ndns(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (ndns->claim_class == NVDIMM_CCLASS_NONE) rc = sprintf(buf, "\n"); else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) || @@ -1309,7 +1309,7 @@ static ssize_t holder_class_show(struct device *dev, rc = sprintf(buf, "dax\n"); else rc = sprintf(buf, "\n"); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1323,7 +1323,7 @@ static ssize_t mode_show(struct device *dev, char *mode; ssize_t rc; - nd_device_lock(dev); + device_lock(dev); claim = ndns->claim; if (claim && is_nd_btt(claim)) mode = "safe"; @@ -1336,7 +1336,7 @@ static ssize_t mode_show(struct device *dev, else mode = "raw"; rc = sprintf(buf, "%s\n", mode); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1456,8 +1456,8 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) * Flush any in-progess probes / removals in the driver * for the raw personality of this namespace. */ - nd_device_lock(&ndns->dev); - nd_device_unlock(&ndns->dev); + device_lock(&ndns->dev); + device_unlock(&ndns->dev); if (ndns->dev.driver) { dev_dbg(&ndns->dev, "is active, can't bind %s\n", dev_name(dev)); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 99b04106434b..cc86ee09d7c0 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -161,70 +161,4 @@ static inline void devm_nsio_disable(struct device *dev, { } #endif - -#ifdef CONFIG_PROVE_NVDIMM_LOCKING -extern struct class *nd_class; - -enum { - LOCK_BUS, - LOCK_NDCTL, - LOCK_REGION, - LOCK_DIMM = LOCK_REGION, - LOCK_NAMESPACE, - LOCK_CLAIM, -}; - -static inline void debug_nvdimm_lock(struct device *dev) -{ - if (is_nd_region(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_REGION); - else if (is_nvdimm(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_DIMM); - else if (is_nd_btt(dev) || is_nd_pfn(dev) || is_nd_dax(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_CLAIM); - else if (dev->parent && (is_nd_region(dev->parent))) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_NAMESPACE); - else if (is_nvdimm_bus(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_BUS); - else if (dev->class && dev->class == nd_class) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_NDCTL); - else - dev_WARN(dev, "unknown lock level\n"); -} - -static inline void debug_nvdimm_unlock(struct device *dev) -{ - mutex_unlock(&dev->lockdep_mutex); -} - -static inline void nd_device_lock(struct device *dev) -{ - device_lock(dev); - debug_nvdimm_lock(dev); -} - -static inline void nd_device_unlock(struct device *dev) -{ - debug_nvdimm_unlock(dev); - device_unlock(dev); -} -#else -static inline void nd_device_lock(struct device *dev) -{ - device_lock(dev); -} - -static inline void nd_device_unlock(struct device *dev) -{ - device_unlock(dev); -} - -static inline void debug_nvdimm_lock(struct device *dev) -{ -} - -static inline void debug_nvdimm_unlock(struct device *dev) -{ -} -#endif #endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 0cd396d0024c..0e92ab4b3283 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -55,7 +55,7 @@ static ssize_t mode_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc = 0; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); if (dev->driver) rc = -EBUSY; @@ -77,7 +77,7 @@ static ssize_t mode_store(struct device *dev, dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -123,14 +123,14 @@ static ssize_t align_store(struct device *dev, unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_pfn->align, nd_pfn_supported_alignments(aligns)); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -152,11 +152,11 @@ static ssize_t uuid_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -181,13 +181,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -199,7 +199,7 @@ static ssize_t resource_show(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) { struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = __le64_to_cpu(pfn_sb->dataoff); @@ -213,7 +213,7 @@ static ssize_t resource_show(struct device *dev, /* no address to convey if the pfn instance is disabled */ rc = -ENXIO; } - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -225,7 +225,7 @@ static ssize_t size_show(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) { struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = __le64_to_cpu(pfn_sb->dataoff); @@ -241,7 +241,7 @@ static ssize_t size_show(struct device *dev, /* no size to convey if the pfn instance is disabled */ rc = -ENXIO; } - nd_device_unlock(dev); + device_unlock(dev); return rc; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 58d95242a836..3992521c151f 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -573,7 +573,7 @@ static void nd_pmem_remove(struct device *dev) nvdimm_namespace_detach_btt(to_nd_btt(dev)); else { /* - * Note, this assumes nd_device_lock() context to not + * Note, this assumes device_lock() context to not * race nd_pmem_notify() */ sysfs_put(pmem->bb_state); diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 188560b1c110..390123d293ea 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -95,7 +95,7 @@ static void nd_region_remove(struct device *dev) nvdimm_bus_unlock(dev); /* - * Note, this assumes nd_device_lock() context to not race + * Note, this assumes device_lock() context to not race * nd_region_notify() */ sysfs_put(nd_region->bb_state); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 8650e8d39c89..d976260eca7a 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -279,7 +279,7 @@ static ssize_t set_cookie_show(struct device *dev, * the v1.1 namespace label cookie definition. To read all this * data we need to wait for probing to settle. */ - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); if (nd_region->ndr_mappings) { @@ -296,7 +296,7 @@ static ssize_t set_cookie_show(struct device *dev, } } nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); if (rc) return rc; @@ -353,12 +353,12 @@ static ssize_t available_size_show(struct device *dev, * memory nvdimm_bus_lock() is dropped, but that's userspace's * problem to not race itself. */ - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); available = nd_region_available_dpa(nd_region); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return sprintf(buf, "%llu\n", available); } @@ -370,12 +370,12 @@ static ssize_t max_available_extent_show(struct device *dev, struct nd_region *nd_region = to_nd_region(dev); unsigned long long available = 0; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); available = nd_region_allocatable_dpa(nd_region); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return sprintf(buf, "%llu\n", available); } @@ -549,12 +549,12 @@ static ssize_t region_badblocks_show(struct device *dev, struct nd_region *nd_region = to_nd_region(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) rc = badblocks_show(&nd_region->bb, buf, 0); else rc = -ENXIO; - nd_device_unlock(dev); + device_unlock(dev); return rc; } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4a2c853c948b..cfe3b092c31d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1544,23 +1544,6 @@ config CSD_LOCK_WAIT_DEBUG include the IPI handler function currently executing (if any) and relevant stack traces. -choice - prompt "Lock debugging: prove subsystem device_lock() correctness" - depends on PROVE_LOCKING - help - For subsystems that have instrumented their usage of the device_lock() - with nested annotations, enable lock dependency checking. The locking - hierarchy 'subclass' identifiers are not compatible across - sub-systems, so only one can be enabled at a time. - -config PROVE_NVDIMM_LOCKING - bool "NVDIMM" - depends on LIBNVDIMM - help - Enable lockdep to validate nd_device_lock() usage. - -endchoice - endmenu # lock debugging config TRACE_IRQFLAGS -- cgit v1.2.3 From fd3abd2cafa46955846d731b9a6ded2c19ab73d8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Apr 2022 08:33:45 -0700 Subject: device-core: Kill the lockdep_mutex Per Peter [1], the lockdep API has native support for all the use cases lockdep_mutex was attempting to enable. Now that all lockdep_mutex users have been converted to those APIs, drop this lock. Link: https://lore.kernel.org/r/Ylf0dewci8myLvoW@hirez.programming.kicks-ass.net [1] Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Suggested-by: Peter Zijlstra Reviewed-by: Ira Weiny Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/165055522548.3745911.14298368286915484086.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/base/core.c | 3 --- include/linux/device.h | 5 ----- 2 files changed, 8 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 3d6430eb0c6a..2eede2ec3d64 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2864,9 +2864,6 @@ void device_initialize(struct device *dev) kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); mutex_init(&dev->mutex); -#ifdef CONFIG_PROVE_LOCKING - mutex_init(&dev->lockdep_mutex); -#endif lockdep_set_novalidate_class(&dev->mutex); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); diff --git a/include/linux/device.h b/include/linux/device.h index 833b0b3b0193..073f1b0126ac 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -400,8 +400,6 @@ struct dev_msi_info { * This identifies the device type and carries type-specific * information. * @mutex: Mutex to synchronize calls to its driver. - * @lockdep_mutex: An optional debug lock that a subsystem can use as a - * peer lock to gain localized lockdep coverage of the device_lock. * @bus: Type of bus device is on. * @driver: Which driver has allocated this * @platform_data: Platform data specific to the device. @@ -499,9 +497,6 @@ struct device { core doesn't touch it */ void *driver_data; /* Driver data, set and get with dev_set_drvdata/dev_get_drvdata */ -#ifdef CONFIG_PROVE_LOCKING - struct mutex lockdep_mutex; -#endif struct mutex mutex; /* mutex to synchronize calls to * its driver. */ -- cgit v1.2.3 From e6829d1bd3c4b58296ee9e412f7ed4d6cb390192 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 26 Apr 2022 13:23:05 -0700 Subject: nvdimm: Fix firmware activation deadlock scenarios Lockdep reports the following deadlock scenarios for CXL root device power-management, device_prepare(), operations, and device_shutdown() operations for 'nd_region' devices: Chain exists of: &nvdimm_region_key --> &nvdimm_bus->reconfig_mutex --> system_transition_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(system_transition_mutex); lock(&nvdimm_bus->reconfig_mutex); lock(system_transition_mutex); lock(&nvdimm_region_key); Chain exists of: &cxl_nvdimm_bridge_key --> acpi_scan_lock --> &cxl_root_key Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&cxl_root_key); lock(acpi_scan_lock); lock(&cxl_root_key); lock(&cxl_nvdimm_bridge_key); These stem from holding nvdimm_bus_lock() over hibernate_quiet_exec() which walks the entire system device topology taking device_lock() along the way. The nvdimm_bus_lock() is protecting against unregistration, multiple simultaneous ops callers, and preventing activate_show() from racing activate_store(). For the first 2, the lock is redundant. Unregistration already flushes all ops users, and sysfs already prevents multiple threads to be active in an ops handler at the same time. For the last userspace should already be waiting for its last activate_store() to complete, and does not need activate_show() to flush the write side, so this lock usage can be deleted in these attributes. Fixes: 48001ea50d17 ("PM, libnvdimm: Add runtime firmware activation support") Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165074883800.4116052.10737040861825806582.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/core.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 144926b7451c..d91799b71d23 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -368,9 +368,7 @@ static ssize_t capability_show(struct device *dev, if (!nd_desc->fw_ops) return -EOPNOTSUPP; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); - nvdimm_bus_unlock(dev); switch (cap) { case NVDIMM_FWA_CAP_QUIESCE: @@ -395,10 +393,8 @@ static ssize_t activate_show(struct device *dev, if (!nd_desc->fw_ops) return -EOPNOTSUPP; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); state = nd_desc->fw_ops->activate_state(nd_desc); - nvdimm_bus_unlock(dev); if (cap < NVDIMM_FWA_CAP_QUIESCE) return -EOPNOTSUPP; @@ -443,7 +439,6 @@ static ssize_t activate_store(struct device *dev, else return -EINVAL; - nvdimm_bus_lock(dev); state = nd_desc->fw_ops->activate_state(nd_desc); switch (state) { @@ -461,7 +456,6 @@ static ssize_t activate_store(struct device *dev, default: rc = -ENXIO; } - nvdimm_bus_unlock(dev); if (rc == 0) rc = len; @@ -484,10 +478,7 @@ static umode_t nvdimm_bus_firmware_visible(struct kobject *kobj, struct attribut if (!nd_desc->fw_ops) return 0; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); - nvdimm_bus_unlock(dev); - if (cap < NVDIMM_FWA_CAP_QUIESCE) return 0; -- cgit v1.2.3 From 2bcf3bbd348fc10260aa6243ff6a22a1882b5b35 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:15 -0700 Subject: cxl/mem: Drop mem_enabled check from wait_for_media() Media ready is asserted by the device independent of whether mem_enabled was ever set. Drop this check to allow for dropping wait_for_media() in favor of ->wait_media_ready(). Fixes: 8dd2bc0f8e02 ("cxl/mem: Add the cxl_mem driver") Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291685501.1426646.10372821863672431074.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 401b0fbe21db..c2d9dadf4a2e 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -27,12 +27,8 @@ static int wait_for_media(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_endpoint_dvsec_info *info = &cxlds->info; int rc; - if (!info->mem_enabled) - return -EBUSY; - rc = cxlds->wait_media_ready(cxlds); if (rc) return rc; -- cgit v1.2.3 From 1e14c9fbb55fbc48eb88b55d1736c994b1deb631 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:20 -0700 Subject: cxl/pci: Consolidate wait_for_media() and wait_for_media_ready() Now that wait_for_media() does nothing supplemental to wait_for_media_ready() just promote wait_for_media_ready() to a common helper and drop wait_for_media(). Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291686046.1426646.4390664747934592185.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 19 +------------------ drivers/cxl/pci.c | 4 ++-- 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index c2d9dadf4a2e..7622cfefa1b0 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -24,23 +24,6 @@ * in higher level operations. */ -static int wait_for_media(struct cxl_memdev *cxlmd) -{ - struct cxl_dev_state *cxlds = cxlmd->cxlds; - int rc; - - rc = cxlds->wait_media_ready(cxlds); - if (rc) - return rc; - - /* - * We know the device is active, and enabled, if any ranges are non-zero - * we'll need to check later before adding the port since that owns the - * HDM decoder registers. - */ - return 0; -} - static int create_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *parent_port) { @@ -157,7 +140,7 @@ static int cxl_mem_probe(struct device *dev) if (work_pending(&cxlmd->detach_work)) return -EBUSY; - rc = wait_for_media(cxlmd); + rc = cxlds->wait_media_ready(cxlds); if (rc) { dev_err(dev, "Media not active (%d)\n", rc); return rc; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index e7ab9a34d718..435f9f89b793 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -423,7 +423,7 @@ static int wait_for_valid(struct cxl_dev_state *cxlds) * Wait up to @mbox_ready_timeout for the device to report memory * active. */ -static int wait_for_media_ready(struct cxl_dev_state *cxlds) +static int cxl_await_media_ready(struct cxl_dev_state *cxlds) { struct pci_dev *pdev = to_pci_dev(cxlds->dev); int d = cxlds->cxl_dvsec; @@ -593,7 +593,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_warn(&pdev->dev, "Device DVSEC not present, skip CXL.mem init\n"); - cxlds->wait_media_ready = wait_for_media_ready; + cxlds->wait_media_ready = cxl_await_media_ready; rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) -- cgit v1.2.3 From 194d5edadf0b403f6de2be89c484a01c83ee269f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:26 -0700 Subject: cxl/pci: Drop wait_for_valid() from cxl_await_media_ready() A check mem_info_valid already happens in __cxl_dvsec_ranges(). Rely on that instead of calling wait_for_valid again. Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291686632.1426646.7479581732894574486.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 435f9f89b793..91b266911e52 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -431,10 +431,6 @@ static int cxl_await_media_ready(struct cxl_dev_state *cxlds) u64 md_status; int rc, i; - rc = wait_for_valid(cxlds); - if (rc) - return rc; - for (i = mbox_ready_timeout; i; i--) { u32 temp; -- cgit v1.2.3 From 76a4121e86649bf381aa32cb69ede913def57202 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:31 -0700 Subject: cxl/mem: Fix cxl_mem_probe() error exit The addition of cxl_mem_active() broke error exit scenarios for cxl_mem_probe(). Return early rather than proceed with disabling suspend, and update the label name since it is no longer a terminal "out" label that exits the function. Fixes: 9ea4dcf49878 ("PM: CXL: Disable suspend") Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291687176.1426646.15449254938752532784.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 7622cfefa1b0..184549e5093f 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -171,13 +171,15 @@ static int cxl_mem_probe(struct device *dev) dev_err(dev, "CXL port topology %s not enabled\n", dev_name(&parent_port->dev)); rc = -ENXIO; - goto out; + goto unlock; } rc = create_endpoint(cxlmd, parent_port); -out: +unlock: device_unlock(&parent_port->dev); put_device(&parent_port->dev); + if (rc) + return rc; /* * The kernel may be operating out of CXL memory on this device, -- cgit v1.2.3 From 75b7ae29991f945b69c10d75b861d7d5e90bd541 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:37 -0700 Subject: cxl/mem: Validate port connectivity before dvsec ranges In preparation for validating DVSEC ranges against the platform declared CXL memory ranges (ACPI CFMWS) move port enumeration before the endpoint's decoder validation. Ultimately this logic will move to the port driver, but create a bisect point before that larger move. Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291687749.1426646.18091538443879226995.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 184549e5093f..80e75a410499 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -140,22 +140,6 @@ static int cxl_mem_probe(struct device *dev) if (work_pending(&cxlmd->detach_work)) return -EBUSY; - rc = cxlds->wait_media_ready(cxlds); - if (rc) { - dev_err(dev, "Media not active (%d)\n", rc); - return rc; - } - - /* - * If DVSEC ranges are being used instead of HDM decoder registers there - * is no use in trying to manage those. - */ - if (!cxl_hdm_decode_init(cxlds)) { - dev_err(dev, - "Legacy range registers configuration prevents HDM operation.\n"); - return -EBUSY; - } - rc = devm_cxl_enumerate_ports(cxlmd); if (rc) return rc; @@ -181,6 +165,22 @@ unlock: if (rc) return rc; + rc = cxlds->wait_media_ready(cxlds); + if (rc) { + dev_err(dev, "Media not active (%d)\n", rc); + return rc; + } + + /* + * If DVSEC ranges are being used instead of HDM decoder registers there + * is no use in trying to manage those. + */ + if (!cxl_hdm_decode_init(cxlds)) { + dev_err(dev, + "Legacy range registers configuration prevents HDM operation.\n"); + return -EBUSY; + } + /* * The kernel may be operating out of CXL memory on this device, * there is no spec defined way to determine whether this device -- cgit v1.2.3 From 2e4ba0ec978335b4b550bbed95cb198ac3a00745 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:43 -0700 Subject: cxl/pci: Move cxl_await_media_ready() to the core Allow cxl_await_media_ready() to be mocked for testing purposes rather than carrying the maintenance burden of an indirect function call in the mainline driver. With the move cxl_await_media_ready() can no longer reuse the mailbox timeout override, so add a media_ready_timeout module parameter to the core to backfill. Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291688340.1426646.4755627801983775011.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 48 +++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxlmem.h | 3 +-- drivers/cxl/mem.c | 2 +- drivers/cxl/pci.c | 45 +--------------------------------------- tools/testing/cxl/Kbuild | 1 + tools/testing/cxl/test/mem.c | 7 ------- tools/testing/cxl/test/mock.c | 15 ++++++++++++++ 7 files changed, 67 insertions(+), 54 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index c9a494d6976a..603945f49174 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1,8 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2021 Intel Corporation. All rights reserved. */ +#include #include +#include #include #include +#include #include #include "core.h" @@ -13,6 +16,10 @@ * a set of helpers for CXL interactions which occur via PCIe. */ +static unsigned short media_ready_timeout = 60; +module_param(media_ready_timeout, ushort, 0644); +MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready"); + struct cxl_walk_context { struct pci_bus *bus; struct cxl_port *port; @@ -94,3 +101,44 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port) return ctx.count; } EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL); + +/* + * Wait up to @media_ready_timeout for the device to report memory + * active. + */ +int cxl_await_media_ready(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + bool active = false; + u64 md_status; + int rc, i; + + for (i = media_ready_timeout; i; i--) { + u32 temp; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); + if (rc) + return rc; + + active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); + if (active) + break; + msleep(1000); + } + + if (!active) { + dev_err(&pdev->dev, + "timeout awaiting memory active after %d seconds\n", + media_ready_timeout); + return -ETIMEDOUT; + } + + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + if (!CXLMDEV_READY(md_status)) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 7235d2f976e5..843916c1dab6 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -192,7 +192,6 @@ struct cxl_endpoint_dvsec_info { * @info: Cached DVSEC information about the device. * @serial: PCIe Device Serial Number * @mbox_send: @dev specific transport for transmitting mailbox commands - * @wait_media_ready: @dev specific method to await media ready * * See section 8.2.9.5.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -227,7 +226,6 @@ struct cxl_dev_state { u64 serial; int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); - int (*wait_media_ready)(struct cxl_dev_state *cxlds); }; enum cxl_opcode { @@ -348,6 +346,7 @@ struct cxl_mem_command { int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, size_t in_size, void *out, size_t out_size); int cxl_dev_state_identify(struct cxl_dev_state *cxlds); +int cxl_await_media_ready(struct cxl_dev_state *cxlds); int cxl_enumerate_cmds(struct cxl_dev_state *cxlds); int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 80e75a410499..8c3a1c85a7ae 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -165,7 +165,7 @@ unlock: if (rc) return rc; - rc = cxlds->wait_media_ready(cxlds); + rc = cxl_await_media_ready(cxlds); if (rc) { dev_err(dev, "Media not active (%d)\n", rc); return rc; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 91b266911e52..1bf880fa1fb8 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -48,8 +48,7 @@ */ static unsigned short mbox_ready_timeout = 60; module_param(mbox_ready_timeout, ushort, 0644); -MODULE_PARM_DESC(mbox_ready_timeout, - "seconds to wait for mailbox ready / memory active status"); +MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready"); static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) { @@ -419,46 +418,6 @@ static int wait_for_valid(struct cxl_dev_state *cxlds) return -ETIMEDOUT; } -/* - * Wait up to @mbox_ready_timeout for the device to report memory - * active. - */ -static int cxl_await_media_ready(struct cxl_dev_state *cxlds) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - int d = cxlds->cxl_dvsec; - bool active = false; - u64 md_status; - int rc, i; - - for (i = mbox_ready_timeout; i; i--) { - u32 temp; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); - if (rc) - return rc; - - active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); - if (active) - break; - msleep(1000); - } - - if (!active) { - dev_err(&pdev->dev, - "timeout awaiting memory active after %d seconds\n", - mbox_ready_timeout); - return -ETIMEDOUT; - } - - md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); - if (!CXLMDEV_READY(md_status)) - return -EIO; - - return 0; -} - /* * Return positive number of non-zero ranges on success and a negative * error code on failure. The cxl_mem driver depends on ranges == 0 to @@ -589,8 +548,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_warn(&pdev->dev, "Device DVSEC not present, skip CXL.mem init\n"); - cxlds->wait_media_ready = cxl_await_media_ready; - rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) return rc; diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 82e49ab0937d..6007fe770122 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -8,6 +8,7 @@ ldflags-y += --wrap=devm_cxl_port_enumerate_dports ldflags-y += --wrap=devm_cxl_setup_hdm ldflags-y += --wrap=devm_cxl_add_passthrough_decoder ldflags-y += --wrap=devm_cxl_enumerate_decoders +ldflags-y += --wrap=cxl_await_media_ready DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index b6b726eff3e2..c519ace17b41 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -237,12 +237,6 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * return rc; } -static int cxl_mock_wait_media_ready(struct cxl_dev_state *cxlds) -{ - msleep(100); - return 0; -} - static void label_area_release(void *lsa) { vfree(lsa); @@ -278,7 +272,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) cxlds->serial = pdev->id; cxlds->mbox_send = cxl_mock_mbox_send; - cxlds->wait_media_ready = cxl_mock_wait_media_ready; cxlds->payload_size = SZ_4K; rc = cxl_enumerate_cmds(cxlds); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 6e8c9d63c92d..2c01d81ab014 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -193,6 +193,21 @@ int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, CXL); +int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) +{ + int rc, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_dev(cxlds->dev)) + rc = 0; + else + rc = cxl_await_media_ready(cxlds); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); -- cgit v1.2.3 From 14d78874077442d1d0f08129f5a0ea5070984b4b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:48 -0700 Subject: cxl/mem: Consolidate CXL DVSEC Range enumeration in the core In preparation for fixing the setting of the 'mem_enabled' bit in CXL DVSEC Control register, move all CXL DVSEC range enumeration into the same source file. Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291688886.1426646.15046138604010482084.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 129 ++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxlmem.h | 1 - drivers/cxl/cxlpci.h | 4 ++ drivers/cxl/mem.c | 14 +++-- drivers/cxl/pci.c | 135 ------------------------------------------ tools/testing/cxl/Kbuild | 1 + tools/testing/cxl/test/mem.c | 10 ---- tools/testing/cxl/test/mock.c | 16 +++++ 8 files changed, 158 insertions(+), 152 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 603945f49174..ea6711721901 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -142,3 +142,132 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds) return 0; } EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL); + +static int wait_for_valid(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec, rc; + u32 val; + + /* + * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high + * and Size Low registers are valid. Must be set within 1 second of + * deassertion of reset to CXL device. Likely it is already set by the + * time this runs, but otherwise give a 1.5 second timeout in case of + * clock skew. + */ + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + msleep(1500); + + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + return -ETIMEDOUT; +} + +/* + * Return positive number of non-zero ranges on success and a negative + * error code on failure. The cxl_mem driver depends on ranges == 0 to + * init HDM operation. + */ +int cxl_dvsec_ranges(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int hdm_count, rc, i, ranges = 0; + struct device *dev = &pdev->dev; + int d = cxlds->cxl_dvsec; + u16 cap, ctrl; + + if (!d) { + dev_dbg(dev, "No DVSEC Capability\n"); + return -ENXIO; + } + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); + if (rc) + return rc; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc) + return rc; + + if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { + dev_dbg(dev, "Not MEM Capable\n"); + return -ENXIO; + } + + /* + * It is not allowed by spec for MEM.capable to be set and have 0 legacy + * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this + * driver is for a spec defined class code which must be CXL.mem + * capable, there is no point in continuing to enable CXL.mem. + */ + hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + if (!hdm_count || hdm_count > 2) + return -EINVAL; + + rc = wait_for_valid(cxlds); + if (rc) { + dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc); + return rc; + } + + info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + + for (i = 0; i < hdm_count; i++) { + u64 base, size; + u32 temp; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); + if (rc) + return rc; + + size = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); + if (rc) + return rc; + + size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); + if (rc) + return rc; + + base = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); + if (rc) + return rc; + + base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; + + info->dvsec_range[i] = (struct range) { + .start = base, + .end = base + size - 1 + }; + + if (size) + ranges++; + } + + info->ranges = ranges; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_dvsec_ranges, CXL); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 843916c1dab6..60d10ee1e7fc 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -222,7 +222,6 @@ struct cxl_dev_state { u64 next_persistent_bytes; resource_size_t component_reg_phys; - struct cxl_endpoint_dvsec_info info; u64 serial; int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 329e7ea3f36a..ad1b62843195 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -72,4 +72,8 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, } int devm_cxl_port_enumerate_dports(struct cxl_port *port); +struct cxl_dev_state; +struct cxl_endpoint_dvsec_info; +int cxl_dvsec_ranges(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 8c3a1c85a7ae..0cfbde134fc7 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -58,18 +58,15 @@ static int create_endpoint(struct cxl_memdev *cxlmd, * decoders, or if it can not be determined if DVSEC Ranges are in use. * Otherwise, returns true. */ -__mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds) +__mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) { - struct cxl_endpoint_dvsec_info *info = &cxlds->info; struct cxl_register_map map; struct cxl_component_reg_map *cmap = &map.component_map; bool global_enable, retval = false; void __iomem *crb; u32 global_ctrl; - if (info->ranges < 0) - return false; - /* map hdm decoder */ crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); if (!crb) { @@ -125,6 +122,7 @@ static void enable_suspend(void *data) static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_endpoint_dvsec_info info = { 0 }; struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_port *parent_port; int rc; @@ -165,6 +163,10 @@ unlock: if (rc) return rc; + rc = cxl_dvsec_ranges(cxlds, &info); + if (rc) + return rc; + rc = cxl_await_media_ready(cxlds); if (rc) { dev_err(dev, "Media not active (%d)\n", rc); @@ -175,7 +177,7 @@ unlock: * If DVSEC ranges are being used instead of HDM decoder registers there * is no use in trying to manage those. */ - if (!cxl_hdm_decode_init(cxlds)) { + if (!cxl_hdm_decode_init(cxlds, &info)) { dev_err(dev, "Legacy range registers configuration prevents HDM operation.\n"); return -EBUSY; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 1bf880fa1fb8..5a0ae46d4989 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -386,139 +386,6 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, return rc; } -static int wait_for_valid(struct cxl_dev_state *cxlds) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - int d = cxlds->cxl_dvsec, rc; - u32 val; - - /* - * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high - * and Size Low registers are valid. Must be set within 1 second of - * deassertion of reset to CXL device. Likely it is already set by the - * time this runs, but otherwise give a 1.5 second timeout in case of - * clock skew. - */ - rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); - if (rc) - return rc; - - if (val & CXL_DVSEC_MEM_INFO_VALID) - return 0; - - msleep(1500); - - rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); - if (rc) - return rc; - - if (val & CXL_DVSEC_MEM_INFO_VALID) - return 0; - - return -ETIMEDOUT; -} - -/* - * Return positive number of non-zero ranges on success and a negative - * error code on failure. The cxl_mem driver depends on ranges == 0 to - * init HDM operation. - */ -static int __cxl_dvsec_ranges(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - int hdm_count, rc, i, ranges = 0; - struct device *dev = &pdev->dev; - int d = cxlds->cxl_dvsec; - u16 cap, ctrl; - - if (!d) { - dev_dbg(dev, "No DVSEC Capability\n"); - return -ENXIO; - } - - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); - if (rc) - return rc; - - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); - if (rc) - return rc; - - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { - dev_dbg(dev, "Not MEM Capable\n"); - return -ENXIO; - } - - /* - * It is not allowed by spec for MEM.capable to be set and have 0 legacy - * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this - * driver is for a spec defined class code which must be CXL.mem - * capable, there is no point in continuing to enable CXL.mem. - */ - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); - if (!hdm_count || hdm_count > 2) - return -EINVAL; - - rc = wait_for_valid(cxlds); - if (rc) { - dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc); - return rc; - } - - info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); - - for (i = 0; i < hdm_count; i++) { - u64 base, size; - u32 temp; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); - if (rc) - return rc; - - size = (u64)temp << 32; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); - if (rc) - return rc; - - size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); - if (rc) - return rc; - - base = (u64)temp << 32; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); - if (rc) - return rc; - - base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; - - info->dvsec_range[i] = (struct range) { - .start = base, - .end = base + size - 1 - }; - - if (size) - ranges++; - } - - return ranges; -} - -static void cxl_dvsec_ranges(struct cxl_dev_state *cxlds) -{ - struct cxl_endpoint_dvsec_info *info = &cxlds->info; - - info->ranges = __cxl_dvsec_ranges(cxlds, info); -} - static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -583,8 +450,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - cxl_dvsec_ranges(cxlds); - cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 6007fe770122..2ea6fcb8baa5 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -9,6 +9,7 @@ ldflags-y += --wrap=devm_cxl_setup_hdm ldflags-y += --wrap=devm_cxl_add_passthrough_decoder ldflags-y += --wrap=devm_cxl_enumerate_decoders ldflags-y += --wrap=cxl_await_media_ready +ldflags-y += --wrap=cxl_dvsec_ranges DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index c519ace17b41..6b9239b2afd4 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -242,14 +242,6 @@ static void label_area_release(void *lsa) vfree(lsa); } -static void mock_validate_dvsec_ranges(struct cxl_dev_state *cxlds) -{ - struct cxl_endpoint_dvsec_info *info; - - info = &cxlds->info; - info->mem_enabled = true; -} - static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -286,8 +278,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - mock_validate_dvsec_ranges(cxlds); - cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 2c01d81ab014..d6aa644822db 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -208,6 +208,22 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); +int __wrap_cxl_dvsec_ranges(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) +{ + int rc = 0, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_dev(cxlds->dev)) + info->mem_enabled = 1; + else + rc = cxl_dvsec_ranges(cxlds, info); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_ranges, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); -- cgit v1.2.3 From dd2d42ad6f422076d1bd49b132bec74376c26f5c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:54 -0700 Subject: cxl/mem: Skip range enumeration if mem_enable clear When a device does not have mem_enable set then the current range settings are moot. Skip the enumeration and cause cxl_hdm_decode_init() to proceed directly to enable the HDM Decoder Capability. Fixes: 560f78559006 ("cxl/pci: Retrieve CXL DVSEC memory info") Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291689442.1426646.18012291761753694336.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 2 ++ drivers/cxl/mem.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index ea6711721901..f3e59f8b6621 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -224,6 +224,8 @@ int cxl_dvsec_ranges(struct cxl_dev_state *cxlds, } info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + if (!info->mem_enabled) + return 0; for (i = 0; i < hdm_count; i++) { u64 base, size; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 0cfbde134fc7..902d1f6e189e 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -92,7 +92,7 @@ __mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds, * are expected even though Linux does not require or maintain that * match. */ - if (!global_enable && info->ranges) + if (!global_enable && info->mem_enabled && info->ranges) goto out; retval = true; -- cgit v1.2.3 From a12562bb70776093b270f79a4b6ef18f4bcead2b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:35:00 -0700 Subject: cxl/mem: Merge cxl_dvsec_ranges() and cxl_hdm_decode_init() In preparation for changing how the driver handles 'mem_enable' in the CXL DVSEC control register. Merge the contents of cxl_hdm_decode_init() into cxl_dvsec_ranges() and rename the combined function cxl_hdm_decode_init(). The possible cleanups and fixes that result from this merge are saved for a follow-on change. Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165291690027.1426646.10249756632415633752.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 82 +++++++++++++++++++++++++++++++++++++++---- drivers/cxl/cxlpci.h | 4 +-- drivers/cxl/mem.c | 80 +---------------------------------------- tools/testing/cxl/Kbuild | 3 +- tools/testing/cxl/mock_mem.c | 10 ------ tools/testing/cxl/test/mock.c | 8 ++--- 6 files changed, 83 insertions(+), 104 deletions(-) delete mode 100644 tools/testing/cxl/mock_mem.c diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index f3e59f8b6621..0fbda1a1ca1b 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -175,13 +175,71 @@ static int wait_for_valid(struct cxl_dev_state *cxlds) return -ETIMEDOUT; } -/* - * Return positive number of non-zero ranges on success and a negative - * error code on failure. The cxl_mem driver depends on ranges == 0 to - * init HDM operation. +static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) +{ + struct cxl_register_map map; + struct cxl_component_reg_map *cmap = &map.component_map; + bool global_enable, retval = false; + void __iomem *crb; + u32 global_ctrl; + + /* map hdm decoder */ + crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); + if (!crb) { + dev_dbg(cxlds->dev, "Failed to map component registers\n"); + return false; + } + + cxl_probe_component_regs(cxlds->dev, crb, cmap); + if (!cmap->hdm_decoder.valid) { + dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n"); + goto out; + } + + global_ctrl = readl(crb + cmap->hdm_decoder.offset + + CXL_HDM_DECODER_CTRL_OFFSET); + global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; + + /* + * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base + * [High,Low] when HDM operation is enabled the range register values + * are ignored by the device, but the spec also recommends matching the + * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges + * are expected even though Linux does not require or maintain that + * match. + */ + if (!global_enable && info->mem_enabled && info->ranges) + goto out; + + retval = true; + + /* + * Permanently (for this boot at least) opt the device into HDM + * operation. Individual HDM decoders still need to be enabled after + * this point. + */ + if (!global_enable) { + dev_dbg(cxlds->dev, "Enabling HDM decode\n"); + writel(global_ctrl | CXL_HDM_DECODER_ENABLE, + crb + cmap->hdm_decoder.offset + + CXL_HDM_DECODER_CTRL_OFFSET); + } + +out: + iounmap(crb); + return retval; +} + +/** + * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint + * @cxlds: Device state + * @info: DVSEC Range cached enumeration + * + * Try to enable the endpoint's HDM Decoder Capability */ -int cxl_dvsec_ranges(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) { struct pci_dev *pdev = to_pci_dev(cxlds->dev); int hdm_count, rc, i, ranges = 0; @@ -270,6 +328,16 @@ int cxl_dvsec_ranges(struct cxl_dev_state *cxlds, info->ranges = ranges; + /* + * If DVSEC ranges are being used instead of HDM decoder registers there + * is no use in trying to manage those. + */ + if (!__cxl_hdm_decode_init(cxlds, info)) { + dev_err(dev, + "Legacy range registers configuration prevents HDM operation.\n"); + return -EBUSY; + } + return 0; } -EXPORT_SYMBOL_NS_GPL(cxl_dvsec_ranges, CXL); +EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index ad1b62843195..202fdaa8d293 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -74,6 +74,6 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; struct cxl_endpoint_dvsec_info; -int cxl_dvsec_ranges(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info); +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 902d1f6e189e..2a5dc92d566f 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -46,74 +46,6 @@ static int create_endpoint(struct cxl_memdev *cxlmd, return cxl_endpoint_autoremove(cxlmd, endpoint); } -/** - * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint - * @cxlds: Device state - * - * Additionally, enables global HDM decoding. Warning: don't call this outside - * of probe. Once probe is complete, the port driver owns all access to the HDM - * decoder registers. - * - * Returns: false if DVSEC Ranges are being used instead of HDM - * decoders, or if it can not be determined if DVSEC Ranges are in use. - * Otherwise, returns true. - */ -__mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) -{ - struct cxl_register_map map; - struct cxl_component_reg_map *cmap = &map.component_map; - bool global_enable, retval = false; - void __iomem *crb; - u32 global_ctrl; - - /* map hdm decoder */ - crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); - if (!crb) { - dev_dbg(cxlds->dev, "Failed to map component registers\n"); - return false; - } - - cxl_probe_component_regs(cxlds->dev, crb, cmap); - if (!cmap->hdm_decoder.valid) { - dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n"); - goto out; - } - - global_ctrl = readl(crb + cmap->hdm_decoder.offset + - CXL_HDM_DECODER_CTRL_OFFSET); - global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; - - /* - * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base - * [High,Low] when HDM operation is enabled the range register values - * are ignored by the device, but the spec also recommends matching the - * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges - * are expected even though Linux does not require or maintain that - * match. - */ - if (!global_enable && info->mem_enabled && info->ranges) - goto out; - - retval = true; - - /* - * Permanently (for this boot at least) opt the device into HDM - * operation. Individual HDM decoders still need to be enabled after - * this point. - */ - if (!global_enable) { - dev_dbg(cxlds->dev, "Enabling HDM decode\n"); - writel(global_ctrl | CXL_HDM_DECODER_ENABLE, - crb + cmap->hdm_decoder.offset + - CXL_HDM_DECODER_CTRL_OFFSET); - } - -out: - iounmap(crb); - return retval; -} - static void enable_suspend(void *data) { cxl_mem_active_dec(); @@ -163,7 +95,7 @@ unlock: if (rc) return rc; - rc = cxl_dvsec_ranges(cxlds, &info); + rc = cxl_hdm_decode_init(cxlds, &info); if (rc) return rc; @@ -173,16 +105,6 @@ unlock: return rc; } - /* - * If DVSEC ranges are being used instead of HDM decoder registers there - * is no use in trying to manage those. - */ - if (!cxl_hdm_decode_init(cxlds, &info)) { - dev_err(dev, - "Legacy range registers configuration prevents HDM operation.\n"); - return -EBUSY; - } - /* * The kernel may be operating out of CXL memory on this device, * there is no spec defined way to determine whether this device diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 2ea6fcb8baa5..33543231d453 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -9,7 +9,7 @@ ldflags-y += --wrap=devm_cxl_setup_hdm ldflags-y += --wrap=devm_cxl_add_passthrough_decoder ldflags-y += --wrap=devm_cxl_enumerate_decoders ldflags-y += --wrap=cxl_await_media_ready -ldflags-y += --wrap=cxl_dvsec_ranges +ldflags-y += --wrap=cxl_hdm_decode_init DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl @@ -36,7 +36,6 @@ cxl_port-y += config_check.o obj-m += cxl_mem.o cxl_mem-y := $(CXL_SRC)/mem.o -cxl_mem-y += mock_mem.o cxl_mem-y += config_check.o obj-m += cxl_core.o diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c deleted file mode 100644 index 69946f678cfa..000000000000 --- a/tools/testing/cxl/mock_mem.c +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ - -#include - -struct cxl_dev_state; -bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds) -{ - return true; -} diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index d6aa644822db..ddf0e7dd9249 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -208,8 +208,8 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); -int __wrap_cxl_dvsec_ranges(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) +int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) { int rc = 0, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); @@ -217,12 +217,12 @@ int __wrap_cxl_dvsec_ranges(struct cxl_dev_state *cxlds, if (ops && ops->is_mock_dev(cxlds->dev)) info->mem_enabled = 1; else - rc = cxl_dvsec_ranges(cxlds, info); + rc = cxl_hdm_decode_init(cxlds, info); put_cxl_mock_ops(index); return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_ranges, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); -- cgit v1.2.3 From 92804edb11f065aadb3a4f398bed8a846a035cd3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:35:06 -0700 Subject: cxl/pci: Drop @info argument to cxl_hdm_decode_init() Now that nothing external to cxl_hdm_decode_init() considers 'struct cxl_endpoint_dvec_info' move it internal to cxl_hdm_decode_init(). Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291690612.1426646.7866084245521113414.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 15 +++++++-------- drivers/cxl/cxlpci.h | 4 +--- drivers/cxl/mem.c | 3 +-- tools/testing/cxl/test/mock.c | 9 +++------ 4 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 0fbda1a1ca1b..7d2238edc379 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -234,14 +234,13 @@ out: /** * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint * @cxlds: Device state - * @info: DVSEC Range cached enumeration * * Try to enable the endpoint's HDM Decoder Capability */ -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds) { struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct cxl_endpoint_dvsec_info info = { 0 }; int hdm_count, rc, i, ranges = 0; struct device *dev = &pdev->dev; int d = cxlds->cxl_dvsec; @@ -281,8 +280,8 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, return rc; } - info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); - if (!info->mem_enabled) + info.mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + if (!info.mem_enabled) return 0; for (i = 0; i < hdm_count; i++) { @@ -317,7 +316,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; - info->dvsec_range[i] = (struct range) { + info.dvsec_range[i] = (struct range) { .start = base, .end = base + size - 1 }; @@ -326,13 +325,13 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, ranges++; } - info->ranges = ranges; + info.ranges = ranges; /* * If DVSEC ranges are being used instead of HDM decoder registers there * is no use in trying to manage those. */ - if (!__cxl_hdm_decode_init(cxlds, info)) { + if (!__cxl_hdm_decode_init(cxlds, &info)) { dev_err(dev, "Legacy range registers configuration prevents HDM operation.\n"); return -EBUSY; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 202fdaa8d293..53cd34f8813c 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -73,7 +73,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; -struct cxl_endpoint_dvsec_info; -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info); +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 2a5dc92d566f..8ce89d128e36 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -54,7 +54,6 @@ static void enable_suspend(void *data) static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_endpoint_dvsec_info info = { 0 }; struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_port *parent_port; int rc; @@ -95,7 +94,7 @@ unlock: if (rc) return rc; - rc = cxl_hdm_decode_init(cxlds, &info); + rc = cxl_hdm_decode_init(cxlds); if (rc) return rc; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index ddf0e7dd9249..45ffbb8f519a 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -208,16 +208,13 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); -int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) +bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds) { int rc = 0, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - if (ops && ops->is_mock_dev(cxlds->dev)) - info->mem_enabled = 1; - else - rc = cxl_hdm_decode_init(cxlds, info); + if (!ops || !ops->is_mock_dev(cxlds->dev)) + rc = cxl_hdm_decode_init(cxlds); put_cxl_mock_ops(index); return rc; -- cgit v1.2.3 From 5e5f4ad52f33c125af9b91d4c3b7cad59c13772e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:35:11 -0700 Subject: cxl/port: Move endpoint HDM Decoder Capability init to port driver The responsibility for establishing HDM Decoder Capability based operation is more closely tied to port enabling than memdev enabling which is concerned with port enumeration. This later enables reusing @cxlhdm for probing / controlling "global enable" for the HDM Decoder Capability. For now, just do the nominal move. Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291691167.1426646.7936109077255288258.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 11 ----------- drivers/cxl/port.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 8ce89d128e36..c310f1fd3db0 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -54,7 +54,6 @@ static void enable_suspend(void *data) static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_port *parent_port; int rc; @@ -94,16 +93,6 @@ unlock: if (rc) return rc; - rc = cxl_hdm_decode_init(cxlds); - if (rc) - return rc; - - rc = cxl_await_media_ready(cxlds); - if (rc) { - dev_err(dev, "Media not active (%d)\n", rc); - return rc; - } - /* * The kernel may be operating out of CXL memory on this device, * there is no spec defined way to determine whether this device diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index d420da5fc39c..a7deaeaf0276 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -38,11 +38,22 @@ static int cxl_port_probe(struct device *dev) if (is_cxl_endpoint(port)) { struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); + struct cxl_dev_state *cxlds = cxlmd->cxlds; get_device(&cxlmd->dev); rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd); if (rc) return rc; + + rc = cxl_hdm_decode_init(cxlds); + if (rc) + return rc; + + rc = cxl_await_media_ready(cxlds); + if (rc) { + dev_err(dev, "Media not active (%d)\n", rc); + return rc; + } } else { rc = devm_cxl_port_enumerate_dports(port); if (rc < 0) -- cgit v1.2.3 From fcfbc93cc33ec601f00f113eca6fc484b930532d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:35:17 -0700 Subject: cxl/port: Reuse 'struct cxl_hdm' context for hdm init The port driver maps component registers for port operations. Reuse that mapping for HDM Decoder Capability setup / enable. Move devm_cxl_setup_hdm() before cxl_hdm_decode_init() and plumb @cxlhdm through the hdm init helpers. Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291691712.1426646.14336397551571515480.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 39 ++++++++++----------------------------- drivers/cxl/cxlpci.h | 2 +- drivers/cxl/port.c | 25 ++++++++++++++----------- tools/testing/cxl/test/mock.c | 5 +++-- 4 files changed, 28 insertions(+), 43 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 7d2238edc379..3305e9b750af 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -176,29 +176,14 @@ static int wait_for_valid(struct cxl_dev_state *cxlds) } static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info) { - struct cxl_register_map map; - struct cxl_component_reg_map *cmap = &map.component_map; - bool global_enable, retval = false; - void __iomem *crb; + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + bool global_enable; u32 global_ctrl; - /* map hdm decoder */ - crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); - if (!crb) { - dev_dbg(cxlds->dev, "Failed to map component registers\n"); - return false; - } - - cxl_probe_component_regs(cxlds->dev, crb, cmap); - if (!cmap->hdm_decoder.valid) { - dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n"); - goto out; - } - - global_ctrl = readl(crb + cmap->hdm_decoder.offset + - CXL_HDM_DECODER_CTRL_OFFSET); + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; /* @@ -210,9 +195,7 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, * match. */ if (!global_enable && info->mem_enabled && info->ranges) - goto out; - - retval = true; + return false; /* * Permanently (for this boot at least) opt the device into HDM @@ -222,22 +205,20 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, if (!global_enable) { dev_dbg(cxlds->dev, "Enabling HDM decode\n"); writel(global_ctrl | CXL_HDM_DECODER_ENABLE, - crb + cmap->hdm_decoder.offset + - CXL_HDM_DECODER_CTRL_OFFSET); + hdm + CXL_HDM_DECODER_CTRL_OFFSET); } -out: - iounmap(crb); - return retval; + return true; } /** * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint * @cxlds: Device state + * @cxlhdm: Mapped HDM decoder Capability * * Try to enable the endpoint's HDM Decoder Capability */ -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds) +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) { struct pci_dev *pdev = to_pci_dev(cxlds->dev); struct cxl_endpoint_dvsec_info info = { 0 }; @@ -331,7 +312,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds) * If DVSEC ranges are being used instead of HDM decoder registers there * is no use in trying to manage those. */ - if (!__cxl_hdm_decode_init(cxlds, &info)) { + if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) { dev_err(dev, "Legacy range registers configuration prevents HDM operation.\n"); return -EBUSY; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 53cd34f8813c..fce1c11729c2 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -73,5 +73,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds); +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index a7deaeaf0276..3cf308f114c4 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -36,6 +36,19 @@ static int cxl_port_probe(struct device *dev) struct cxl_hdm *cxlhdm; int rc; + + if (!is_cxl_endpoint(port)) { + rc = devm_cxl_port_enumerate_dports(port); + if (rc < 0) + return rc; + if (rc == 1) + return devm_cxl_add_passthrough_decoder(port); + } + + cxlhdm = devm_cxl_setup_hdm(port); + if (IS_ERR(cxlhdm)) + return PTR_ERR(cxlhdm); + if (is_cxl_endpoint(port)) { struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); struct cxl_dev_state *cxlds = cxlmd->cxlds; @@ -45,7 +58,7 @@ static int cxl_port_probe(struct device *dev) if (rc) return rc; - rc = cxl_hdm_decode_init(cxlds); + rc = cxl_hdm_decode_init(cxlds, cxlhdm); if (rc) return rc; @@ -54,18 +67,8 @@ static int cxl_port_probe(struct device *dev) dev_err(dev, "Media not active (%d)\n", rc); return rc; } - } else { - rc = devm_cxl_port_enumerate_dports(port); - if (rc < 0) - return rc; - if (rc == 1) - return devm_cxl_add_passthrough_decoder(port); } - cxlhdm = devm_cxl_setup_hdm(port); - if (IS_ERR(cxlhdm)) - return PTR_ERR(cxlhdm); - rc = devm_cxl_enumerate_decoders(cxlhdm); if (rc) { dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 45ffbb8f519a..f1f8c40948c5 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -208,13 +208,14 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); -bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds) +bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_hdm *cxlhdm) { int rc = 0, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (!ops || !ops->is_mock_dev(cxlds->dev)) - rc = cxl_hdm_decode_init(cxlds); + rc = cxl_hdm_decode_init(cxlds, cxlhdm); put_cxl_mock_ops(index); return rc; -- cgit v1.2.3 From 34e37b4c432cd0f1842b352fde4b8878b4166888 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 20 May 2022 11:30:15 -0700 Subject: cxl/port: Enable HDM Capability after validating DVSEC Ranges CXL memory expanders that support the CXL 2.0 memory device class code include an "HDM Decoder Capability" mechanism to supplant the "CXL DVSEC Range" mechanism originally defined in CXL 1.1. Both mechanisms depend on a "mem_enable" bit being set in configuration space before either mechanism activates. When the HDM Decoder Capability is enabled the CXL DVSEC Range settings are ignored. Previously, the cxl_mem driver was relying on platform-firmware to set "mem_enable". That is an invalid assumption as there is no requirement that platform-firmware sets the bit before the driver sees a device, especially in hot-plug scenarios. Additionally, ACPI-platforms that support CXL 2.0 devices also support the ACPI CEDT (CXL Early Discovery Table). That table outlines the platform permissible address ranges for CXL operation. So, there is a need for the driver to set "mem_enable", and there is information available to determine the validity of the CXL DVSEC Ranges. Arrange for the driver to optionally enable the HDM Decoder Capability if "mem_enable" was not set by platform firmware, or the CXL DVSEC Range configuration was invalid. Be careful to only disable memory decode if the kernel was the one to enable it. In other words, if CXL is backing all of kernel memory at boot the device needs to maintain "mem_enable" and "HDM Decoder enable" all the way up to handoff back to platform firmware (e.g. ACPI S5 state entry may require CXL memory to stay active). Fixes: 560f78559006 ("cxl/pci: Retrieve CXL DVSEC memory info") Cc: Dan Carpenter [dan: fix early terminiation of range-allowed loop] Cc: Ariel Sibley [ariel: Memory_size must be non-zero] Cc: Jonathan Cameron Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165307136375.2499769.861793697156744166.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 167 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 152 insertions(+), 15 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 3305e9b750af..c4c99ff7b55e 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -175,16 +175,149 @@ static int wait_for_valid(struct cxl_dev_state *cxlds) return -ETIMEDOUT; } +static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + u16 ctrl; + int rc; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc < 0) + return rc; + + if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val) + return 1; + ctrl &= ~CXL_DVSEC_MEM_ENABLE; + ctrl |= val; + + rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl); + if (rc < 0) + return rc; + + return 0; +} + +static void clear_mem_enable(void *cxlds) +{ + cxl_set_mem_enable(cxlds, 0); +} + +static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) +{ + int rc; + + rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE); + if (rc < 0) + return rc; + if (rc > 0) + return 0; + return devm_add_action_or_reset(host, clear_mem_enable, cxlds); +} + +static bool range_contains(struct range *r1, struct range *r2) +{ + return r1->start <= r2->start && r1->end >= r2->end; +} + +/* require dvsec ranges to be covered by a locked platform window */ +static int dvsec_range_allowed(struct device *dev, void *arg) +{ + struct range *dev_range = arg; + struct cxl_decoder *cxld; + struct range root_range; + + if (!is_root_decoder(dev)) + return 0; + + cxld = to_cxl_decoder(dev); + + if (!(cxld->flags & CXL_DECODER_F_LOCK)) + return 0; + if (!(cxld->flags & CXL_DECODER_F_RAM)) + return 0; + + root_range = (struct range) { + .start = cxld->platform_res.start, + .end = cxld->platform_res.end, + }; + + return range_contains(&root_range, dev_range); +} + +static void disable_hdm(void *_cxlhdm) +{ + u32 global_ctrl; + struct cxl_hdm *cxlhdm = _cxlhdm; + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + writel(global_ctrl & ~CXL_HDM_DECODER_ENABLE, + hdm + CXL_HDM_DECODER_CTRL_OFFSET); +} + +static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm) +{ + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + u32 global_ctrl; + + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + writel(global_ctrl | CXL_HDM_DECODER_ENABLE, + hdm + CXL_HDM_DECODER_CTRL_OFFSET); + + return devm_add_action_or_reset(host, disable_hdm, cxlhdm); +} + static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info) { void __iomem *hdm = cxlhdm->regs.hdm_decoder; - bool global_enable; + struct cxl_port *port = cxlhdm->port; + struct device *dev = cxlds->dev; + struct cxl_port *root; + int i, rc, allowed; u32 global_ctrl; global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); - global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; + + /* + * If the HDM Decoder Capability is already enabled then assume + * that some other agent like platform firmware set it up. + */ + if (global_ctrl & CXL_HDM_DECODER_ENABLE) { + rc = devm_cxl_enable_mem(&port->dev, cxlds); + if (rc) + return false; + return true; + } + + root = to_cxl_port(port->dev.parent); + while (!is_cxl_root(root) && is_cxl_port(root->dev.parent)) + root = to_cxl_port(root->dev.parent); + if (!is_cxl_root(root)) { + dev_err(dev, "Failed to acquire root port for HDM enable\n"); + return false; + } + + for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) { + struct device *cxld_dev; + + cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i], + dvsec_range_allowed); + if (!cxld_dev) { + dev_dbg(dev, "DVSEC Range%d denied by platform\n", i); + continue; + } + dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i); + put_device(cxld_dev); + allowed++; + } + + if (!allowed) { + cxl_set_mem_enable(cxlds, 0); + info->mem_enabled = 0; + } /* * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base @@ -192,21 +325,19 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, * are ignored by the device, but the spec also recommends matching the * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges * are expected even though Linux does not require or maintain that - * match. + * match. If at least one DVSEC range is enabled and allowed, skip HDM + * Decoder Capability Enable. */ - if (!global_enable && info->mem_enabled && info->ranges) + if (info->mem_enabled) return false; - /* - * Permanently (for this boot at least) opt the device into HDM - * operation. Individual HDM decoders still need to be enabled after - * this point. - */ - if (!global_enable) { - dev_dbg(cxlds->dev, "Enabling HDM decode\n"); - writel(global_ctrl | CXL_HDM_DECODER_ENABLE, - hdm + CXL_HDM_DECODER_CTRL_OFFSET); - } + rc = devm_cxl_enable_hdm(&port->dev, cxlhdm); + if (rc) + return false; + + rc = devm_cxl_enable_mem(&port->dev, cxlds); + if (rc) + return false; return true; } @@ -261,9 +392,14 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) return rc; } + /* + * The current DVSEC values are moot if the memory capability is + * disabled, and they will remain moot after the HDM Decoder + * capability is enabled. + */ info.mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); if (!info.mem_enabled) - return 0; + goto hdm_init; for (i = 0; i < hdm_count; i++) { u64 base, size; @@ -312,6 +448,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) * If DVSEC ranges are being used instead of HDM decoder registers there * is no use in trying to manage those. */ +hdm_init: if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) { dev_err(dev, "Legacy range registers configuration prevents HDM operation.\n"); -- cgit v1.2.3