summaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_nl.c
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2015-06-08 15:18:45 +0200
committerJens Axboe <axboe@fb.com>2015-11-25 09:22:03 -0700
commit5f7c01249bea67c32a1a1551a8f2fe0b8b801ab4 (patch)
tree369efade0df17d0170e1dcade7b9043e559f372a /drivers/block/drbd/drbd_nl.c
parent603ee2c8c78b2fb5a9dc14fb8b2bb2650ebcab1f (diff)
downloadlinux-5f7c01249bea67c32a1a1551a8f2fe0b8b801ab4.tar.bz2
drbd: avoid potential deadlock during handshake
During handshake communication, we also reconsider our device size, using drbd_determine_dev_size(). Just in case we need to change the offsets or layout of our on-disk metadata, we lock out application and other meta data IO, and wait for the activity log to be "idle" (no more referenced extents). If this handshake happens just after a connection loss, with a fencing policy of "resource-and-stonith", we have frozen IO. If, additionally, the activity log was "starving" (too many incoming random writes at that point in time), it won't become idle, ever, because of the frozen IO, and this would be a lockup of the receiver thread, and consquentially of DRBD. Previous logic (re-)initialized with a special "empty" transaction block, which required the activity log to fully drain first. Instead, write out some standard activity log transactions. Using lc_try_lock_for_transaction() instead of lc_try_lock() does not care about pending activity log references, avoiding the potential deadlock. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'drivers/block/drbd/drbd_nl.c')
-rw-r--r--drivers/block/drbd/drbd_nl.c33
1 files changed, 19 insertions, 14 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index c7cd3df8107e..f4ca27359541 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -903,15 +903,14 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
int md_moved, la_size_changed;
enum determine_dev_size rv = DS_UNCHANGED;
- /* race:
- * application request passes inc_ap_bio,
- * but then cannot get an AL-reference.
- * this function later may wait on ap_bio_cnt == 0. -> deadlock.
+ /* We may change the on-disk offsets of our meta data below. Lock out
+ * anything that may cause meta data IO, to avoid acting on incomplete
+ * layout changes or scribbling over meta data that is in the process
+ * of being moved.
*
- * to avoid that:
- * Suspend IO right here.
- * still lock the act_log to not trigger ASSERTs there.
- */
+ * Move is not exactly correct, btw, currently we have all our meta
+ * data in core memory, to "move" it we just write it all out, there
+ * are no reads. */
drbd_suspend_io(device);
buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
if (!buffer) {
@@ -919,9 +918,6 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
return DS_ERROR;
}
- /* no wait necessary anymore, actually we could assert that */
- wait_event(device->al_wait, lc_try_lock(device->act_log));
-
prev_first_sect = drbd_md_first_sector(device->ldev);
prev_size = device->ldev->md.md_size_sect;
la_size_sect = device->ldev->md.la_size_sect;
@@ -997,20 +993,29 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
* Clear the timer, to avoid scary "timer expired!" messages,
* "Superblock" is written out at least twice below, anyways. */
del_timer(&device->md_sync_timer);
- drbd_al_shrink(device); /* All extents inactive. */
+ /* We won't change the "al-extents" setting, we just may need
+ * to move the on-disk location of the activity log ringbuffer.
+ * Lock for transaction is good enough, it may well be "dirty"
+ * or even "starving". */
+ wait_event(device->al_wait, lc_try_lock_for_transaction(device->act_log));
+
+ /* mark current on-disk bitmap and activity log as unreliable */
prev_flags = md->flags;
- md->flags &= ~MDF_PRIMARY_IND;
+ md->flags |= MDF_FULL_SYNC | MDF_AL_DISABLED;
drbd_md_write(device, buffer);
+ drbd_al_initialize(device, buffer);
+
drbd_info(device, "Writing the whole bitmap, %s\n",
la_size_changed && md_moved ? "size changed and md moved" :
la_size_changed ? "size changed" : "md moved");
/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
"size changed", BM_LOCKED_MASK);
- drbd_initialize_al(device, buffer);
+ /* on-disk bitmap and activity log is authoritative again
+ * (unless there was an IO error meanwhile...) */
md->flags = prev_flags;
drbd_md_write(device, buffer);