From 6c1ea260f89709e0021d2c59f8fd2a104b5b1123 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 11 Apr 2016 19:34:49 +0200 Subject: libceph: make authorizer destruction independent of ceph_auth_client Starting the kernel client with cephx disabled and then enabling cephx and restarting userspace daemons can result in a crash: [262671.478162] BUG: unable to handle kernel paging request at ffffebe000000000 [262671.531460] IP: [] kfree+0x5a/0x130 [262671.584334] PGD 0 [262671.635847] Oops: 0000 [#1] SMP [262672.055841] CPU: 22 PID: 2961272 Comm: kworker/22:2 Not tainted 4.2.0-34-generic #39~14.04.1-Ubuntu [262672.162338] Hardware name: Dell Inc. PowerEdge R720/068CDY, BIOS 2.4.3 07/09/2014 [262672.268937] Workqueue: ceph-msgr con_work [libceph] [262672.322290] task: ffff88081c2d0dc0 ti: ffff880149ae8000 task.ti: ffff880149ae8000 [262672.428330] RIP: 0010:[] [] kfree+0x5a/0x130 [262672.535880] RSP: 0018:ffff880149aeba58 EFLAGS: 00010286 [262672.589486] RAX: 000001e000000000 RBX: 0000000000000012 RCX: ffff8807e7461018 [262672.695980] RDX: 000077ff80000000 RSI: ffff88081af2be04 RDI: 0000000000000012 [262672.803668] RBP: ffff880149aeba78 R08: 0000000000000000 R09: 0000000000000000 [262672.912299] R10: ffffebe000000000 R11: ffff880819a60e78 R12: ffff8800aec8df40 [262673.021769] R13: ffffffffc035f70f R14: ffff8807e5b138e0 R15: ffff880da9785840 [262673.131722] FS: 0000000000000000(0000) GS:ffff88081fac0000(0000) knlGS:0000000000000000 [262673.245377] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [262673.303281] CR2: ffffebe000000000 CR3: 0000000001c0d000 CR4: 00000000001406e0 [262673.417556] Stack: [262673.472943] ffff880149aeba88 ffff88081af2be04 ffff8800aec8df40 ffff88081af2be04 [262673.583767] ffff880149aeba98 ffffffffc035f70f ffff880149aebac8 ffff8800aec8df00 [262673.694546] ffff880149aebac8 ffffffffc035c89e ffff8807e5b138e0 ffff8805b047f800 [262673.805230] Call Trace: [262673.859116] [] ceph_x_destroy_authorizer+0x1f/0x50 [libceph] [262673.968705] [] ceph_auth_destroy_authorizer+0x3e/0x60 [libceph] [262674.078852] [] put_osd+0x45/0x80 [libceph] [262674.134249] [] remove_osd+0xae/0x140 [libceph] [262674.189124] [] __reset_osd+0x103/0x150 [libceph] [262674.243749] [] kick_requests+0x223/0x460 [libceph] [262674.297485] [] ceph_osdc_handle_map+0x282/0x5e0 [libceph] [262674.350813] [] dispatch+0x4e/0x720 [libceph] [262674.403312] [] try_read+0x3d1/0x1090 [libceph] [262674.454712] [] ? dequeue_entity+0x152/0x690 [262674.505096] [] con_work+0xcb/0x1300 [libceph] [262674.555104] [] process_one_work+0x14e/0x3d0 [262674.604072] [] worker_thread+0x11a/0x470 [262674.652187] [] ? rescuer_thread+0x310/0x310 [262674.699022] [] kthread+0xd2/0xf0 [262674.744494] [] ? kthread_create_on_node+0x1c0/0x1c0 [262674.789543] [] ret_from_fork+0x3f/0x70 [262674.834094] [] ? kthread_create_on_node+0x1c0/0x1c0 What happens is the following: (1) new MON session is established (2) old "none" ac is destroyed (3) new "cephx" ac is constructed ... (4) old OSD session (w/ "none" authorizer) is put ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer) osd->o_auth.authorizer in the "none" case is just a bare pointer into ac, which contains a single static copy for all services. By the time we get to (4), "none" ac, freed in (2), is long gone. On top of that, a new vtable installed in (3) points us at ceph_x_destroy_authorizer(), so we end up trying to destroy a "none" authorizer with a "cephx" destructor operating on invalid memory! To fix this, decouple authorizer destruction from ac and do away with a single static "none" authorizer by making a copy for each OSD or MDS session. Authorizers themselves are independent of ac and so there is no reason for destroy_authorizer() to be an ac op. Make it an op on the authorizer itself by turning ceph_authorizer into a real struct. Fixes: http://tracker.ceph.com/issues/15447 Reported-by: Alan Zhang Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/auth_none.c | 71 +++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 32 deletions(-) (limited to 'net/ceph/auth_none.c') diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c index 8c93fa8d81bc..5f836f02ae36 100644 --- a/net/ceph/auth_none.c +++ b/net/ceph/auth_none.c @@ -16,7 +16,6 @@ static void reset(struct ceph_auth_client *ac) struct ceph_auth_none_info *xi = ac->private; xi->starting = true; - xi->built_authorizer = false; } static void destroy(struct ceph_auth_client *ac) @@ -39,6 +38,27 @@ static int should_authenticate(struct ceph_auth_client *ac) return xi->starting; } +static int ceph_auth_none_build_authorizer(struct ceph_auth_client *ac, + struct ceph_none_authorizer *au) +{ + void *p = au->buf; + void *const end = p + sizeof(au->buf); + int ret; + + ceph_encode_8_safe(&p, end, 1, e_range); + ret = ceph_entity_name_encode(ac->name, &p, end); + if (ret < 0) + return ret; + + ceph_encode_64_safe(&p, end, ac->global_id, e_range); + au->buf_len = p - (void *)au->buf; + dout("%s built authorizer len %d\n", __func__, au->buf_len); + return 0; + +e_range: + return -ERANGE; +} + static int build_request(struct ceph_auth_client *ac, void *buf, void *end) { return 0; @@ -57,32 +77,32 @@ static int handle_reply(struct ceph_auth_client *ac, int result, return result; } +static void ceph_auth_none_destroy_authorizer(struct ceph_authorizer *a) +{ + kfree(a); +} + /* - * build an 'authorizer' with our entity_name and global_id. we can - * reuse a single static copy since it is identical for all services - * we connect to. + * build an 'authorizer' with our entity_name and global_id. it is + * identical for all services we connect to. */ static int ceph_auth_none_create_authorizer( struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth) { - struct ceph_auth_none_info *ai = ac->private; - struct ceph_none_authorizer *au = &ai->au; - void *p, *end; + struct ceph_none_authorizer *au; int ret; - if (!ai->built_authorizer) { - p = au->buf; - end = p + sizeof(au->buf); - ceph_encode_8(&p, 1); - ret = ceph_entity_name_encode(ac->name, &p, end - 8); - if (ret < 0) - goto bad; - ceph_decode_need(&p, end, sizeof(u64), bad2); - ceph_encode_64(&p, ac->global_id); - au->buf_len = p - (void *)au->buf; - ai->built_authorizer = true; - dout("built authorizer len %d\n", au->buf_len); + au = kmalloc(sizeof(*au), GFP_NOFS); + if (!au) + return -ENOMEM; + + au->base.destroy = ceph_auth_none_destroy_authorizer; + + ret = ceph_auth_none_build_authorizer(ac, au); + if (ret) { + kfree(au); + return ret; } auth->authorizer = (struct ceph_authorizer *) au; @@ -92,17 +112,6 @@ static int ceph_auth_none_create_authorizer( auth->authorizer_reply_buf_len = sizeof (au->reply_buf); return 0; - -bad2: - ret = -ERANGE; -bad: - return ret; -} - -static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac, - struct ceph_authorizer *a) -{ - /* nothing to do */ } static const struct ceph_auth_client_ops ceph_auth_none_ops = { @@ -114,7 +123,6 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = { .build_request = build_request, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, - .destroy_authorizer = ceph_auth_none_destroy_authorizer, }; int ceph_auth_none_init(struct ceph_auth_client *ac) @@ -127,7 +135,6 @@ int ceph_auth_none_init(struct ceph_auth_client *ac) return -ENOMEM; xi->starting = true; - xi->built_authorizer = false; ac->protocol = CEPH_AUTH_NONE; ac->private = xi; -- cgit v1.2.3