diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-17 16:03:12 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-17 16:03:12 -0800 |
commit | 57666509b70030a9483d13222bfec8eec5db07df (patch) | |
tree | 1e0021c2aabc2ce8832e8c816e2aa94b0b77a323 /fs/ceph/addr.c | |
parent | 87c31b39abcb6fb6bd7d111200c9627a594bf6a9 (diff) | |
parent | 0aeff37abada9f8c08d2b10481a43d3ae406c823 (diff) | |
download | linux-57666509b70030a9483d13222bfec8eec5db07df.tar.bz2 |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull ceph updates from Sage Weil:
"The big item here is support for inline data for CephFS and for
message signatures from Zheng. There are also several bug fixes,
including interrupted flock request handling, 0-length xattrs, mksnap,
cached readdir results, and a message version compat field. Finally
there are several cleanups from Ilya, Dan, and Markus.
Note that there is another series coming soon that fixes some bugs in
the RBD 'lingering' requests, but it isn't quite ready yet"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (27 commits)
ceph: fix setting empty extended attribute
ceph: fix mksnap crash
ceph: do_sync is never initialized
libceph: fixup includes in pagelist.h
ceph: support inline data feature
ceph: flush inline version
ceph: convert inline data to normal data before data write
ceph: sync read inline data
ceph: fetch inline data when getting Fcr cap refs
ceph: use getattr request to fetch inline data
ceph: add inline data to pagecache
ceph: parse inline data in MClientReply and MClientCaps
libceph: specify position of extent operation
libceph: add CREATE osd operation support
libceph: add SETXATTR/CMPXATTR osd operations support
rbd: don't treat CEPH_OSD_OP_DELETE as extent op
ceph: remove unused stringification macros
libceph: require cephx message signature by default
ceph: introduce global empty snap context
ceph: message versioning fixes
...
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r-- | fs/ceph/addr.c | 273 |
1 files changed, 262 insertions, 11 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 18c06bbaf136..f5013d92a7e6 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -192,17 +192,30 @@ static int readpage_nounlock(struct file *filp, struct page *page) struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->client->osdc; int err = 0; + u64 off = page_offset(page); u64 len = PAGE_CACHE_SIZE; - err = ceph_readpage_from_fscache(inode, page); + if (off >= i_size_read(inode)) { + zero_user_segment(page, err, PAGE_CACHE_SIZE); + SetPageUptodate(page); + return 0; + } + /* + * Uptodate inline data should have been added into page cache + * while getting Fcr caps. + */ + if (ci->i_inline_version != CEPH_INLINE_NONE) + return -EINVAL; + + err = ceph_readpage_from_fscache(inode, page); if (err == 0) goto out; dout("readpage inode %p file %p page %p index %lu\n", inode, filp, page, page->index); err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, - (u64) page_offset(page), &len, + off, &len, ci->i_truncate_seq, ci->i_truncate_size, &page, 1, 0); if (err == -ENOENT) @@ -319,7 +332,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) off, len); vino = ceph_vino(inode); req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, - 1, CEPH_OSD_OP_READ, + 0, 1, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, ci->i_truncate_seq, ci->i_truncate_size, false); @@ -384,6 +397,9 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, int rc = 0; int max = 0; + if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE) + return -EINVAL; + rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list, &nr_pages); @@ -673,7 +689,7 @@ static int ceph_writepages_start(struct address_space *mapping, int rc = 0; unsigned wsize = 1 << inode->i_blkbits; struct ceph_osd_request *req = NULL; - int do_sync; + int do_sync = 0; u64 truncate_size, snap_size; u32 truncate_seq; @@ -750,7 +766,6 @@ retry: last_snapc = snapc; while (!done && index <= end) { - int num_ops = do_sync ? 2 : 1; unsigned i; int first; pgoff_t next; @@ -850,7 +865,8 @@ get_more_pages: len = wsize; req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, - offset, &len, num_ops, + offset, &len, 0, + do_sync ? 2 : 1, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, @@ -862,6 +878,9 @@ get_more_pages: break; } + if (do_sync) + osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC); + req->r_callback = writepages_finish; req->r_inode = inode; @@ -1204,6 +1223,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = file_inode(vma->vm_file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; + struct page *pinned_page = NULL; loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT; int want, got, ret; @@ -1215,7 +1235,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) want = CEPH_CAP_FILE_CACHE; while (1) { got = 0; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); + ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, + -1, &got, &pinned_page); if (ret == 0) break; if (ret != -ERESTARTSYS) { @@ -1226,12 +1247,54 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) dout("filemap_fault %p %llu~%zd got cap refs on %s\n", inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got)); - ret = filemap_fault(vma, vmf); + if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || + ci->i_inline_version == CEPH_INLINE_NONE) + ret = filemap_fault(vma, vmf); + else + ret = -EAGAIN; dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret); + if (pinned_page) + page_cache_release(pinned_page); ceph_put_cap_refs(ci, got); + if (ret != -EAGAIN) + return ret; + + /* read inline data */ + if (off >= PAGE_CACHE_SIZE) { + /* does not support inline data > PAGE_SIZE */ + ret = VM_FAULT_SIGBUS; + } else { + int ret1; + struct address_space *mapping = inode->i_mapping; + struct page *page = find_or_create_page(mapping, 0, + mapping_gfp_mask(mapping) & + ~__GFP_FS); + if (!page) { + ret = VM_FAULT_OOM; + goto out; + } + ret1 = __ceph_do_getattr(inode, page, + CEPH_STAT_CAP_INLINE_DATA, true); + if (ret1 < 0 || off >= i_size_read(inode)) { + unlock_page(page); + page_cache_release(page); + ret = VM_FAULT_SIGBUS; + goto out; + } + if (ret1 < PAGE_CACHE_SIZE) + zero_user_segment(page, ret1, PAGE_CACHE_SIZE); + else + flush_dcache_page(page); + SetPageUptodate(page); + vmf->page = page; + ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED; + } +out: + dout("filemap_fault %p %llu~%zd read inline data ret %d\n", + inode, off, (size_t)PAGE_CACHE_SIZE, ret); return ret; } @@ -1250,6 +1313,19 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) size_t len; int want, got, ret; + if (ci->i_inline_version != CEPH_INLINE_NONE) { + struct page *locked_page = NULL; + if (off == 0) { + lock_page(page); + locked_page = page; + } + ret = ceph_uninline_data(vma->vm_file, locked_page); + if (locked_page) + unlock_page(locked_page); + if (ret < 0) + return VM_FAULT_SIGBUS; + } + if (off + PAGE_CACHE_SIZE <= size) len = PAGE_CACHE_SIZE; else @@ -1263,7 +1339,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) want = CEPH_CAP_FILE_BUFFER; while (1) { got = 0; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len); + ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len, + &got, NULL); if (ret == 0) break; if (ret != -ERESTARTSYS) { @@ -1297,11 +1374,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = VM_FAULT_SIGBUS; } out: - if (ret != VM_FAULT_LOCKED) { + if (ret != VM_FAULT_LOCKED) unlock_page(page); - } else { + if (ret == VM_FAULT_LOCKED || + ci->i_inline_version != CEPH_INLINE_NONE) { int dirty; spin_lock(&ci->i_ceph_lock); + ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); spin_unlock(&ci->i_ceph_lock); if (dirty) @@ -1315,6 +1394,178 @@ out: return ret; } +void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, + char *data, size_t len) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page; + + if (locked_page) { + page = locked_page; + } else { + if (i_size_read(inode) == 0) + return; + page = find_or_create_page(mapping, 0, + mapping_gfp_mask(mapping) & ~__GFP_FS); + if (!page) + return; + if (PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + return; + } + } + + dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n", + inode, ceph_vinop(inode), len, locked_page); + + if (len > 0) { + void *kaddr = kmap_atomic(page); + memcpy(kaddr, data, len); + kunmap_atomic(kaddr); + } + + if (page != locked_page) { + if (len < PAGE_CACHE_SIZE) + zero_user_segment(page, len, PAGE_CACHE_SIZE); + else + flush_dcache_page(page); + + SetPageUptodate(page); + unlock_page(page); + page_cache_release(page); + } +} + +int ceph_uninline_data(struct file *filp, struct page *locked_page) +{ + struct inode *inode = file_inode(filp); + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_osd_request *req; + struct page *page = NULL; + u64 len, inline_version; + int err = 0; + bool from_pagecache = false; + + spin_lock(&ci->i_ceph_lock); + inline_version = ci->i_inline_version; + spin_unlock(&ci->i_ceph_lock); + + dout("uninline_data %p %llx.%llx inline_version %llu\n", + inode, ceph_vinop(inode), inline_version); + + if (inline_version == 1 || /* initial version, no data */ + inline_version == CEPH_INLINE_NONE) + goto out; + + if (locked_page) { + page = locked_page; + WARN_ON(!PageUptodate(page)); + } else if (ceph_caps_issued(ci) & + (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) { + page = find_get_page(inode->i_mapping, 0); + if (page) { + if (PageUptodate(page)) { + from_pagecache = true; + lock_page(page); + } else { + page_cache_release(page); + page = NULL; + } + } + } + + if (page) { + len = i_size_read(inode); + if (len > PAGE_CACHE_SIZE) + len = PAGE_CACHE_SIZE; + } else { + page = __page_cache_alloc(GFP_NOFS); + if (!page) { + err = -ENOMEM; + goto out; + } + err = __ceph_do_getattr(inode, page, + CEPH_STAT_CAP_INLINE_DATA, true); + if (err < 0) { + /* no inline data */ + if (err == -ENODATA) + err = 0; + goto out; + } + len = err; + } + + req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, + ceph_vino(inode), 0, &len, 0, 1, + CEPH_OSD_OP_CREATE, + CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, + ci->i_snap_realm->cached_context, + 0, 0, false); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto out; + } + + ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); + err = ceph_osdc_start_request(&fsc->client->osdc, req, false); + if (!err) + err = ceph_osdc_wait_request(&fsc->client->osdc, req); + ceph_osdc_put_request(req); + if (err < 0) + goto out; + + req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, + ceph_vino(inode), 0, &len, 1, 3, + CEPH_OSD_OP_WRITE, + CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, + ci->i_snap_realm->cached_context, + ci->i_truncate_seq, ci->i_truncate_size, + false); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto out; + } + + osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); + + err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR, + "inline_version", &inline_version, + sizeof(inline_version), + CEPH_OSD_CMPXATTR_OP_GT, + CEPH_OSD_CMPXATTR_MODE_U64); + if (err) + goto out_put; + + err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR, + "inline_version", &inline_version, + sizeof(inline_version), 0, 0); + if (err) + goto out_put; + + ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); + err = ceph_osdc_start_request(&fsc->client->osdc, req, false); + if (!err) + err = ceph_osdc_wait_request(&fsc->client->osdc, req); +out_put: + ceph_osdc_put_request(req); + if (err == -ECANCELED) + err = 0; +out: + if (page && page != locked_page) { + if (from_pagecache) { + unlock_page(page); + page_cache_release(page); + } else + __free_pages(page, 0); + } + + dout("uninline_data %p %llx.%llx inline_version %llu = %d\n", + inode, ceph_vinop(inode), inline_version, err); + return err; +} + static struct vm_operations_struct ceph_vmops = { .fault = ceph_filemap_fault, .page_mkwrite = ceph_page_mkwrite, |