diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-01-02 18:49:58 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-01-02 18:49:58 -0800 |
commit | 77d0b194b2df04a1992f882d96ff4e2bd8bb8fe0 (patch) | |
tree | 2bc1800faf8bf6712e693b5a8ea9c1fdecbe1dc7 /drivers/block/drbd/drbd_nl.c | |
parent | b79f9f93eb483f2757b089bb4e1eb3827a609080 (diff) | |
parent | 427c5ce4417cba0801fbf79c8525d1330704759c (diff) | |
download | linux-77d0b194b2df04a1992f882d96ff4e2bd8bb8fe0.tar.bz2 |
Merge tag 'for-4.21/block-20190102' of git://git.kernel.dk/linux-block
Pull more block updates from Jens Axboe:
- Dead code removal for loop/sunvdc (Chengguang)
- Mark BIDI support for bsg as deprecated, logging a single dmesg
warning if anyone is actually using it (Christoph)
- blkcg cleanup, killing a dead function and making the tryget_closest
variant easier to read (Dennis)
- Floppy fixes, one fixing a regression in swim3 (Finn)
- lightnvm use-after-free fix (Gustavo)
- gdrom leak fix (Wenwen)
- a set of drbd updates (Lars, Luc, Nathan, Roland)
* tag 'for-4.21/block-20190102' of git://git.kernel.dk/linux-block: (28 commits)
block/swim3: Fix regression on PowerBook G3
block/swim3: Fix -EBUSY error when re-opening device after unmount
block/swim3: Remove dead return statement
block/amiflop: Don't log error message on invalid ioctl
gdrom: fix a memory leak bug
lightnvm: pblk: fix use-after-free bug
block: sunvdc: remove redundant code
block: loop: remove redundant code
bsg: deprecate BIDI support in bsg
blkcg: remove unused __blkg_release_rcu()
blkcg: clean up blkg_tryget_closest()
drbd: Change drbd_request_detach_interruptible's return type to int
drbd: Avoid Clang warning about pointless switch statment
drbd: introduce P_ZEROES (REQ_OP_WRITE_ZEROES on the "wire")
drbd: skip spurious timeout (ping-timeo) when failing promote
drbd: don't retry connection if peers do not agree on "authentication" settings
drbd: fix print_st_err()'s prototype to match the definition
drbd: avoid spurious self-outdating with concurrent disconnect / down
drbd: do not block when adjusting "disk-options" while IO is frozen
drbd: fix comment typos
...
Diffstat (limited to 'drivers/block/drbd/drbd_nl.c')
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 133 |
1 files changed, 107 insertions, 26 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d15703b1ffe8..f2471172a961 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -127,6 +127,35 @@ static int drbd_msg_put_info(struct sk_buff *skb, const char *info) return 0; } +__printf(2, 3) +static int drbd_msg_sprintf_info(struct sk_buff *skb, const char *fmt, ...) +{ + va_list args; + struct nlattr *nla, *txt; + int err = -EMSGSIZE; + int len; + + nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY); + if (!nla) + return err; + + txt = nla_reserve(skb, T_info_text, 256); + if (!txt) { + nla_nest_cancel(skb, nla); + return err; + } + va_start(args, fmt); + len = vscnprintf(nla_data(txt), 256, fmt, args); + va_end(args); + + /* maybe: retry with larger reserve, if truncated */ + txt->nla_len = nla_attr_size(len+1); + nlmsg_trim(skb, (char*)txt + NLA_ALIGN(txt->nla_len)); + nla_nest_end(skb, nla); + + return 0; +} + /* This would be a good candidate for a "pre_doit" hook, * and per-family private info->pointers. * But we need to stay compatible with older kernels. @@ -668,14 +697,15 @@ drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int for if (rv == SS_TWO_PRIMARIES) { /* Maybe the peer is detected as dead very soon... retry at most once more in this case. */ - int timeo; - rcu_read_lock(); - nc = rcu_dereference(connection->net_conf); - timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; - rcu_read_unlock(); - schedule_timeout_interruptible(timeo); - if (try < max_tries) + if (try < max_tries) { + int timeo; try = max_tries - 1; + rcu_read_lock(); + nc = rcu_dereference(connection->net_conf); + timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; + rcu_read_unlock(); + schedule_timeout_interruptible(timeo); + } continue; } if (rv < SS_SUCCESS) { @@ -921,7 +951,6 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct } prev; sector_t u_size, size; struct drbd_md *md = &device->ldev->md; - char ppb[10]; void *buffer; int md_moved, la_size_changed; @@ -999,8 +1028,6 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct /* racy, see comments above. */ drbd_set_my_capacity(device, size); md->la_size_sect = size; - drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), - (unsigned long long)size>>1); } if (rv <= DS_ERROR) goto err_out; @@ -1234,6 +1261,21 @@ static void fixup_discard_if_not_supported(struct request_queue *q) } } +static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q) +{ + /* Fixup max_write_zeroes_sectors after blk_queue_stack_limits(): + * if we can handle "zeroes" efficiently on the protocol, + * we want to do that, even if our backend does not announce + * max_write_zeroes_sectors itself. */ + struct drbd_connection *connection = first_peer_device(device)->connection; + /* If the peer announces WZEROES support, use it. Otherwise, rather + * send explicit zeroes than rely on some discard-zeroes-data magic. */ + if (connection->agreed_features & DRBD_FF_WZEROES) + q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS; + else + q->limits.max_write_zeroes_sectors = 0; +} + static void decide_on_write_same_support(struct drbd_device *device, struct request_queue *q, struct request_queue *b, struct o_qlim *o, @@ -1344,6 +1386,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi } } fixup_discard_if_not_supported(q); + fixup_write_zeroes(device, q); } void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) @@ -1514,6 +1557,30 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis } } +static int disk_opts_check_al_size(struct drbd_device *device, struct disk_conf *dc) +{ + int err = -EBUSY; + + if (device->act_log && + device->act_log->nr_elements == dc->al_extents) + return 0; + + drbd_suspend_io(device); + /* If IO completion is currently blocked, we would likely wait + * "forever" for the activity log to become unused. So we don't. */ + if (atomic_read(&device->ap_bio_cnt)) + goto out; + + wait_event(device->al_wait, lc_try_lock(device->act_log)); + drbd_al_shrink(device); + err = drbd_check_al_size(device, dc); + lc_unlock(device->act_log); + wake_up(&device->al_wait); +out: + drbd_resume_io(device); + return err; +} + int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { struct drbd_config_context adm_ctx; @@ -1576,15 +1643,12 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } } - drbd_suspend_io(device); - wait_event(device->al_wait, lc_try_lock(device->act_log)); - drbd_al_shrink(device); - err = drbd_check_al_size(device, new_disk_conf); - lc_unlock(device->act_log); - wake_up(&device->al_wait); - drbd_resume_io(device); - + err = disk_opts_check_al_size(device, new_disk_conf); if (err) { + /* Could be just "busy". Ignore? + * Introduce dedicated error code? */ + drbd_msg_put_info(adm_ctx.reply_skb, + "Try again without changing current al-extents setting"); retcode = ERR_NOMEM; goto fail_unlock; } @@ -1934,9 +1998,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } } - if (device->state.conn < C_CONNECTED && - device->state.role == R_PRIMARY && device->ed_uuid && - (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { + if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid && + (device->state.role == R_PRIMARY || device->state.peer == R_PRIMARY) && + (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { drbd_err(device, "Can only attach to data with current UUID=%016llX\n", (unsigned long long)device->ed_uuid); retcode = ERR_DATA_NOT_CURRENT; @@ -1950,11 +2014,21 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } /* Prevent shrinking of consistent devices ! */ - if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && - drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) { - drbd_warn(device, "refusing to truncate a consistent device\n"); - retcode = ERR_DISK_TOO_SMALL; - goto force_diskless_dec; + { + unsigned long long nsz = drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0); + unsigned long long eff = nbc->md.la_size_sect; + if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && nsz < eff) { + if (nsz == nbc->disk_conf->disk_size) { + drbd_warn(device, "truncating a consistent device during attach (%llu < %llu)\n", nsz, eff); + } else { + drbd_warn(device, "refusing to truncate a consistent device (%llu < %llu)\n", nsz, eff); + drbd_msg_sprintf_info(adm_ctx.reply_skb, + "To-be-attached device has last effective > current size, and is consistent\n" + "(%llu > %llu sectors). Refusing to attach.", eff, nsz); + retcode = ERR_IMPLICIT_SHRINK; + goto force_diskless_dec; + } + } } lock_all_resources(); @@ -2654,8 +2728,10 @@ out: static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force) { + enum drbd_conns cstate; enum drbd_state_rv rv; +repeat: rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), force ? CS_HARD : 0); @@ -2673,6 +2749,11 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection break; case SS_CW_FAILED_BY_PEER: + spin_lock_irq(&connection->resource->req_lock); + cstate = connection->cstate; + spin_unlock_irq(&connection->resource->req_lock); + if (cstate <= C_WF_CONNECTION) + goto repeat; /* The peer probably wants to see us outdated. */ rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, disk, D_OUTDATED), 0); |