From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/fs/cifs/file.c | 1184 +++++++++++++++++++++++++++++++++++++++++++++------------- 1 files changed, 917 insertions(+), 267 deletions(-) diff --git a/kernel/fs/cifs/file.c b/kernel/fs/cifs/file.c index 7b48248..a567382 100644 --- a/kernel/fs/cifs/file.c +++ b/kernel/fs/cifs/file.c @@ -33,6 +33,7 @@ #include <linux/mount.h> #include <linux/slab.h> #include <linux/swap.h> +#include <linux/mm.h> #include <asm/div64.h> #include "cifsfs.h" #include "cifspdu.h" @@ -222,9 +223,6 @@ if (!buf) return -ENOMEM; - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (f_flags & O_SYNC) create_options |= CREATE_WRITE_THROUGH; @@ -235,7 +233,7 @@ oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = fid; @@ -246,6 +244,7 @@ if (rc) goto out; + /* TODO: Add support for calling posix query info but with passing in fid */ if (tcon->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, xid); @@ -288,6 +287,8 @@ msleep(10); } +static void cifsFileInfo_put_work(struct work_struct *work); + struct cifsFileInfo * cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock) @@ -322,6 +323,7 @@ cfile->invalidHandle = false; cfile->tlink = cifs_get_tlink(tlink); INIT_WORK(&cfile->oplock_break, cifs_oplock_break); + INIT_WORK(&cfile->put, cifsFileInfo_put_work); mutex_init(&cfile->fh_mutex); spin_lock_init(&cfile->file_info_lock); @@ -349,6 +351,7 @@ server->ops->set_fid(cfile, fid, oplock); list_add(&cfile->tlist, &tcon->openFileList); + atomic_inc(&tcon->num_local_opens); /* if readable file instance put first in list*/ spin_lock(&cinode->open_file_lock); @@ -375,6 +378,41 @@ return cifs_file; } +static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) +{ + struct inode *inode = d_inode(cifs_file->dentry); + struct cifsInodeInfo *cifsi = CIFS_I(inode); + struct cifsLockInfo *li, *tmp; + struct super_block *sb = inode->i_sb; + + /* + * Delete any outstanding lock records. We'll lose them when the file + * is closed anyway. + */ + cifs_down_write(&cifsi->lock_sem); + list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { + list_del(&li->llist); + cifs_del_lock_waiters(li); + kfree(li); + } + list_del(&cifs_file->llist->llist); + kfree(cifs_file->llist); + up_write(&cifsi->lock_sem); + + cifs_put_tlink(cifs_file->tlink); + dput(cifs_file->dentry); + cifs_sb_deactive(sb); + kfree(cifs_file); +} + +static void cifsFileInfo_put_work(struct work_struct *work) +{ + struct cifsFileInfo *cifs_file = container_of(work, + struct cifsFileInfo, put); + + cifsFileInfo_put_final(cifs_file); +} + /** * cifsFileInfo_put - release a reference of file priv data * @@ -382,15 +420,15 @@ */ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) { - _cifsFileInfo_put(cifs_file, true); + _cifsFileInfo_put(cifs_file, true, true); } /** * _cifsFileInfo_put - release a reference of file priv data * * This may involve closing the filehandle @cifs_file out on the - * server. Must be called without holding tcon->open_file_lock and - * cifs_file->file_info_lock. + * server. Must be called without holding tcon->open_file_lock, + * cinode->open_file_lock and cifs_file->file_info_lock. * * If @wait_for_oplock_handler is true and we are releasing the last * reference, wait for any running oplock break handler of the file @@ -398,7 +436,8 @@ * oplock break handler, you need to pass false. * */ -void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) +void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, + bool wait_oplock_handler, bool offload) { struct inode *inode = d_inode(cifs_file->dentry); struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); @@ -406,7 +445,6 @@ struct cifsInodeInfo *cifsi = CIFS_I(inode); struct super_block *sb = inode->i_sb; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); - struct cifsLockInfo *li, *tmp; struct cifs_fid fid; struct cifs_pending_open open; bool oplock_break_cancelled; @@ -431,6 +469,7 @@ /* remove it from the lists */ list_del(&cifs_file->flist); list_del(&cifs_file->tlist); + atomic_dec(&tcon->num_local_opens); if (list_empty(&cifsi->openFileList)) { cifs_dbg(FYI, "closing last open instance for inode %p\n", @@ -456,7 +495,9 @@ unsigned int xid; xid = get_xid(); - if (server->ops->close) + if (server->ops->close_getattr) + server->ops->close_getattr(xid, tcon, cifs_file); + else if (server->ops->close) server->ops->close(xid, tcon, &cifs_file->fid); _free_xid(xid); } @@ -466,24 +507,10 @@ cifs_del_pending_open(&open); - /* - * Delete any outstanding lock records. We'll lose them when the file - * is closed anyway. - */ - cifs_down_write(&cifsi->lock_sem); - list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { - list_del(&li->llist); - cifs_del_lock_waiters(li); - kfree(li); - } - list_del(&cifs_file->llist->llist); - kfree(cifs_file->llist); - up_write(&cifsi->lock_sem); - - cifs_put_tlink(cifs_file->tlink); - dput(cifs_file->dentry); - cifs_sb_deactive(sb); - kfree(cifs_file); + if (offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); } int cifs_open(struct inode *inode, struct file *file) @@ -724,9 +751,6 @@ desired_access = cifs_convert_flags(cfile->f_flags); - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (cfile->f_flags & O_SYNC) create_options |= CREATE_WRITE_THROUGH; @@ -740,7 +764,7 @@ oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = &cfile->fid; @@ -778,7 +802,9 @@ if (!is_interrupt_error(rc)) mapping_set_error(inode->i_mapping, rc); - if (tcon->unix_ext) + if (tcon->posix_extensions) + rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid); + else if (tcon->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, xid); else @@ -814,7 +840,7 @@ int cifs_close(struct inode *inode, struct file *file) { if (file->private_data != NULL) { - cifsFileInfo_put(file->private_data); + _cifsFileInfo_put(file->private_data, true, false); file->private_data = NULL; } @@ -835,7 +861,7 @@ tcon->need_reopen_files = false; - cifs_dbg(FYI, "Reopen persistent handles"); + cifs_dbg(FYI, "Reopen persistent handles\n"); INIT_LIST_HEAD(&tmp_list); /* list all files open on tree connection, reopen resilient handles */ @@ -910,7 +936,7 @@ } static struct cifsLockInfo * -cifs_lock_init(__u64 offset, __u64 length, __u8 type) +cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags) { struct cifsLockInfo *lock = kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); @@ -920,6 +946,7 @@ lock->length = length; lock->type = type; lock->pid = current->tgid; + lock->flags = flags; INIT_LIST_HEAD(&lock->blist); init_waitqueue_head(&lock->block_q); return lock; @@ -942,7 +969,8 @@ /* @rw_check : 0 - no op, 1 - read, 2 - write */ static bool cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, - __u64 length, __u8 type, struct cifsFileInfo *cfile, + __u64 length, __u8 type, __u16 flags, + struct cifsFileInfo *cfile, struct cifsLockInfo **conf_lock, int rw_check) { struct cifsLockInfo *li; @@ -964,6 +992,10 @@ ((server->ops->compare_fids(cfile, cur_cfile) && current->tgid == li->pid) || type == li->type)) continue; + if (rw_check == CIFS_LOCK_OP && + (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) && + server->ops->compare_fids(cfile, cur_cfile)) + continue; if (conf_lock) *conf_lock = li; return true; @@ -973,8 +1005,8 @@ bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, - __u8 type, struct cifsLockInfo **conf_lock, - int rw_check) + __u8 type, __u16 flags, + struct cifsLockInfo **conf_lock, int rw_check) { bool rc = false; struct cifs_fid_locks *cur; @@ -982,7 +1014,8 @@ list_for_each_entry(cur, &cinode->llist, llist) { rc = cifs_find_fid_lock_conflict(cur, offset, length, type, - cfile, conf_lock, rw_check); + flags, cfile, conf_lock, + rw_check); if (rc) break; } @@ -1010,7 +1043,8 @@ down_read(&cinode->lock_sem); exist = cifs_find_lock_conflict(cfile, offset, length, type, - &conf_lock, CIFS_LOCK_OP); + flock->fl_flags, &conf_lock, + CIFS_LOCK_OP); if (exist) { flock->fl_start = conf_lock->offset; flock->fl_end = conf_lock->offset + conf_lock->length - 1; @@ -1041,7 +1075,7 @@ * Set the byte-range lock (mandatory style). Returns: * 1) 0, if we set the lock and don't need to request to the server; * 2) 1, if no locks prevent us but we need to request to the server; - * 3) -EACCESS, if there is a lock that prevents us and wait is false. + * 3) -EACCES, if there is a lock that prevents us and wait is false. */ static int cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, @@ -1057,7 +1091,8 @@ cifs_down_write(&cinode->lock_sem); exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, - lock->type, &conf_lock, CIFS_LOCK_OP); + lock->type, lock->flags, &conf_lock, + CIFS_LOCK_OP); if (!exist && cinode->can_cache_brlcks) { list_add_tail(&lock->llist, &cfile->llist->locks); up_write(&cinode->lock_sem); @@ -1115,20 +1150,20 @@ /* * Set the byte-range lock (posix style). Returns: - * 1) 0, if we set the lock and don't need to request to the server; - * 2) 1, if we need to request to the server; - * 3) <0, if the error occurs while setting the lock. + * 1) <0, if the error occurs while setting the lock; + * 2) 0, if we set the lock and don't need to request to the server; + * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock; + * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server. */ static int cifs_posix_lock_set(struct file *file, struct file_lock *flock) { struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); - int rc = 1; + int rc = FILE_LOCK_DEFERRED + 1; if ((flock->fl_flags & FL_POSIX) == 0) return rc; -try_again: cifs_down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { up_write(&cinode->lock_sem); @@ -1137,12 +1172,6 @@ rc = posix_lock_file(file, flock, NULL); up_write(&cinode->lock_sem); - if (rc == FILE_LOCK_DEFERRED) { - rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); - if (!rc) - goto try_again; - posix_unblock_lock(flock); - } return rc; } @@ -1371,7 +1400,7 @@ cifs_dbg(FYI, "Lease on file - not implemented yet\n"); if (flock->fl_flags & (~(FL_POSIX | FL_FLOCK | FL_SLEEP | - FL_ACCESS | FL_LEASE | FL_CLOSE))) + FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK))) cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags); *type = server->vals->large_lock_type; @@ -1617,7 +1646,7 @@ int posix_lock_type; rc = cifs_posix_lock_set(file, flock); - if (!rc || rc < 0) + if (rc <= FILE_LOCK_DEFERRED) return rc; if (type & server->vals->shared_lock_type) @@ -1638,7 +1667,8 @@ if (lock) { struct cifsLockInfo *lock; - lock = cifs_lock_init(flock->fl_start, length, type); + lock = cifs_lock_init(flock->fl_start, length, type, + flock->fl_flags); if (!lock) return -ENOMEM; @@ -1677,7 +1707,7 @@ rc = server->ops->mand_unlock_range(cfile, flock, xid); out: - if (flock->fl_flags & FL_POSIX) { + if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) { /* * If this is a request to remove all locks because we * are closing the file, it doesn't matter if the @@ -1694,6 +1724,55 @@ return rc; } +int cifs_flock(struct file *file, int cmd, struct file_lock *fl) +{ + int rc, xid; + int lock = 0, unlock = 0; + bool wait_flag = false; + bool posix_lck = false; + struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; + struct cifsFileInfo *cfile; + __u32 type; + + xid = get_xid(); + + if (!(fl->fl_flags & FL_FLOCK)) { + rc = -ENOLCK; + free_xid(xid); + return rc; + } + + cfile = (struct cifsFileInfo *)file->private_data; + tcon = tlink_tcon(cfile->tlink); + + cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag, + tcon->ses->server); + cifs_sb = CIFS_FILE_SB(file); + + if (cap_unix(tcon->ses) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + posix_lck = true; + + if (!lock && !unlock) { + /* + * if no lock or unlock then nothing to do since we do not + * know what it is + */ + rc = -EOPNOTSUPP; + free_xid(xid); + return rc; + } + + rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, + xid); + free_xid(xid); + return rc; + + +} + int cifs_lock(struct file *file, int cmd, struct file_lock *flock) { int rc, xid; @@ -1702,9 +1781,7 @@ bool posix_lck = false; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; - struct cifsInodeInfo *cinode; struct cifsFileInfo *cfile; - __u16 netfid; __u32 type; rc = -EACCES; @@ -1719,10 +1796,7 @@ cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, tcon->ses->server); - cifs_sb = CIFS_FILE_SB(file); - netfid = cfile->fid.netfid; - cinode = CIFS_I(file_inode(file)); if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && @@ -1774,15 +1848,12 @@ int rc = 0; unsigned int bytes_written = 0; unsigned int total_written; - struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; struct TCP_Server_Info *server; unsigned int xid; struct dentry *dentry = open_file->dentry; struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); - struct cifs_io_parms io_parms; - - cifs_sb = CIFS_SB(dentry->d_sb); + struct cifs_io_parms io_parms = {0}; cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", write_size, *offset, dentry); @@ -1886,23 +1957,30 @@ return NULL; } -struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, - bool fsuid_only) +/* Return -EBADF if no handle is found and general rc otherwise */ +int +cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, + struct cifsFileInfo **ret_file) { struct cifsFileInfo *open_file, *inv_file = NULL; struct cifs_sb_info *cifs_sb; bool any_available = false; - int rc; + int rc = -EBADF; unsigned int refind = 0; + bool fsuid_only = flags & FIND_WR_FSUID_ONLY; + bool with_delete = flags & FIND_WR_WITH_DELETE; + *ret_file = NULL; - /* Having a null inode here (because mapping->host was set to zero by - the VFS or MM) should not happen but we had reports of on oops (due to - it being zero) during stress testcases so we need to check for it */ + /* + * Having a null inode here (because mapping->host was set to zero by + * the VFS or MM) should not happen but we had reports of on oops (due + * to it being zero) during stress testcases so we need to check for it + */ if (cifs_inode == NULL) { cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n"); dump_stack(); - return NULL; + return rc; } cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb); @@ -1915,19 +1993,22 @@ refind_writable: if (refind > MAX_REOPEN_ATT) { spin_unlock(&cifs_inode->open_file_lock); - return NULL; + return rc; } list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { if (!any_available && open_file->pid != current->tgid) continue; if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) continue; + if (with_delete && !(open_file->fid.access & DELETE)) + continue; if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { if (!open_file->invalidHandle) { /* found a good writable file */ cifsFileInfo_get(open_file); spin_unlock(&cifs_inode->open_file_lock); - return open_file; + *ret_file = open_file; + return 0; } else { if (!inv_file) inv_file = open_file; @@ -1949,22 +2030,107 @@ if (inv_file) { rc = cifs_reopen_file(inv_file, false); - if (!rc) - return inv_file; - else { - spin_lock(&cifs_inode->open_file_lock); - list_move_tail(&inv_file->flist, - &cifs_inode->openFileList); - spin_unlock(&cifs_inode->open_file_lock); - cifsFileInfo_put(inv_file); - ++refind; - inv_file = NULL; - spin_lock(&cifs_inode->open_file_lock); - goto refind_writable; + if (!rc) { + *ret_file = inv_file; + return 0; } + + spin_lock(&cifs_inode->open_file_lock); + list_move_tail(&inv_file->flist, &cifs_inode->openFileList); + spin_unlock(&cifs_inode->open_file_lock); + cifsFileInfo_put(inv_file); + ++refind; + inv_file = NULL; + spin_lock(&cifs_inode->open_file_lock); + goto refind_writable; } - return NULL; + return rc; +} + +struct cifsFileInfo * +find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) +{ + struct cifsFileInfo *cfile; + int rc; + + rc = cifs_get_writable_file(cifs_inode, flags, &cfile); + if (rc) + cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc); + + return cfile; +} + +int +cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, + int flags, + struct cifsFileInfo **ret_file) +{ + struct list_head *tmp; + struct cifsFileInfo *cfile; + struct cifsInodeInfo *cinode; + char *full_path; + + *ret_file = NULL; + + spin_lock(&tcon->open_file_lock); + list_for_each(tmp, &tcon->openFileList) { + cfile = list_entry(tmp, struct cifsFileInfo, + tlist); + full_path = build_path_from_dentry(cfile->dentry); + if (full_path == NULL) { + spin_unlock(&tcon->open_file_lock); + return -ENOMEM; + } + if (strcmp(full_path, name)) { + kfree(full_path); + continue; + } + + kfree(full_path); + cinode = CIFS_I(d_inode(cfile->dentry)); + spin_unlock(&tcon->open_file_lock); + return cifs_get_writable_file(cinode, flags, ret_file); + } + + spin_unlock(&tcon->open_file_lock); + return -ENOENT; +} + +int +cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, + struct cifsFileInfo **ret_file) +{ + struct list_head *tmp; + struct cifsFileInfo *cfile; + struct cifsInodeInfo *cinode; + char *full_path; + + *ret_file = NULL; + + spin_lock(&tcon->open_file_lock); + list_for_each(tmp, &tcon->openFileList) { + cfile = list_entry(tmp, struct cifsFileInfo, + tlist); + full_path = build_path_from_dentry(cfile->dentry); + if (full_path == NULL) { + spin_unlock(&tcon->open_file_lock); + return -ENOMEM; + } + if (strcmp(full_path, name)) { + kfree(full_path); + continue; + } + + kfree(full_path); + cinode = CIFS_I(d_inode(cfile->dentry)); + spin_unlock(&tcon->open_file_lock); + *ret_file = find_readable_file(cinode, 0); + return *ret_file ? 0 : -ENOENT; + } + + spin_unlock(&tcon->open_file_lock); + return -ENOENT; } static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) @@ -2001,8 +2167,9 @@ if (mapping->host->i_size - offset < (loff_t)to) to = (unsigned)(mapping->host->i_size - offset); - open_file = find_writable_file(CIFS_I(mapping->host), false); - if (open_file) { + rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, + &open_file); + if (!rc) { bytes_written = cifs_write(open_file, open_file->pid, write_data, to - from, &offset); cifsFileInfo_put(open_file); @@ -2012,9 +2179,12 @@ rc = 0; else if (bytes_written < 0) rc = bytes_written; + else + rc = -EFAULT; } else { - cifs_dbg(FYI, "No writeable filehandles for inode\n"); - rc = -EIO; + cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc); + if (!is_retryable_error(rc)) + rc = -EIO; } kunmap(page); @@ -2121,9 +2291,7 @@ wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages, struct address_space *mapping, struct writeback_control *wbc) { - int rc = 0; - struct TCP_Server_Info *server; - unsigned int i; + int rc; wdata->sync_mode = wbc->sync_mode; wdata->nr_pages = nr_pages; @@ -2133,21 +2301,17 @@ page_offset(wdata->pages[nr_pages - 1]), (loff_t)PAGE_SIZE); wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz; + wdata->pid = wdata->cfile->pid; - if (wdata->cfile != NULL) - cifsFileInfo_put(wdata->cfile); - wdata->cfile = find_writable_file(CIFS_I(mapping->host), false); - if (!wdata->cfile) { - cifs_dbg(VFS, "No writable handles for inode\n"); - rc = -EBADF; - } else { - wdata->pid = wdata->cfile->pid; - server = tlink_tcon(wdata->cfile->tlink)->ses->server; - rc = server->ops->async_writev(wdata, cifs_writedata_release); - } + rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); + if (rc) + return rc; - for (i = 0; i < nr_pages; ++i) - unlock_page(wdata->pages[i]); + if (wdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = wdata->server->ops->async_writev(wdata, + cifs_writedata_release); return rc; } @@ -2155,13 +2319,16 @@ static int cifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb); + struct inode *inode = mapping->host; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct TCP_Server_Info *server; bool done = false, scanned = false, range_whole = false; pgoff_t end, index; struct cifs_writedata *wdata; + struct cifsFileInfo *cfile = NULL; int rc = 0; int saved_rc = 0; + unsigned int xid; /* * If wsize is smaller than the page cache size, default to writing @@ -2170,6 +2337,7 @@ if (cifs_sb->wsize < PAGE_SIZE) return generic_writepages(mapping, wbc); + xid = get_xid(); if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ end = -1; @@ -2180,14 +2348,27 @@ range_whole = true; scanned = true; } - server = cifs_sb_master_tcon(cifs_sb)->ses->server; + server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); + retry: while (!done && index <= end) { - unsigned int i, nr_pages, found_pages, wsize, credits; + unsigned int i, nr_pages, found_pages, wsize; pgoff_t next = 0, tofind, saved_index = index; + struct cifs_credits credits_on_stack; + struct cifs_credits *credits = &credits_on_stack; + int get_file_rc = 0; + + if (cfile) + cifsFileInfo_put(cfile); + + rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); + + /* in case of an error store it to return later */ + if (rc) + get_file_rc = rc; rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize, - &wsize, &credits); + &wsize, credits); if (rc != 0) { done = true; break; @@ -2220,13 +2401,27 @@ continue; } - wdata->credits = credits; + wdata->credits = credits_on_stack; + wdata->cfile = cfile; + wdata->server = server; + cfile = NULL; - rc = wdata_send_pages(wdata, nr_pages, mapping, wbc); + if (!wdata->cfile) { + cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", + get_file_rc); + if (is_retryable_error(get_file_rc)) + rc = get_file_rc; + else + rc = -EBADF; + } else + rc = wdata_send_pages(wdata, nr_pages, mapping, wbc); + + for (i = 0; i < nr_pages; ++i) + unlock_page(wdata->pages[i]); /* send failure -- clean up the mess */ if (rc != 0) { - add_credits_and_wake_if(server, wdata->credits, 0); + add_credits_and_wake_if(server, &wdata->credits, 0); for (i = 0; i < nr_pages; ++i) { if (is_retryable_error(rc)) redirty_page_for_writepage(wbc, @@ -2278,6 +2473,9 @@ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; + if (cfile) + cifsFileInfo_put(cfile); + free_xid(xid); return rc; } @@ -2402,9 +2600,10 @@ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); rc = file_write_and_wait_range(file, start, end); - if (rc) + if (rc) { + trace_cifs_fsync_err(inode->i_ino, rc); return rc; - inode_lock(inode); + } xid = get_xid(); @@ -2422,14 +2621,24 @@ tcon = tlink_tcon(smbfile->tlink); if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { server = tcon->ses->server; - if (server->ops->flush) - rc = server->ops->flush(xid, tcon, &smbfile->fid); - else + if (server->ops->flush == NULL) { rc = -ENOSYS; + goto strict_fsync_exit; + } + + if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { + smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); + if (smbfile) { + rc = server->ops->flush(xid, tcon, &smbfile->fid); + cifsFileInfo_put(smbfile); + } else + cifs_dbg(FYI, "ignore fsync for file not open for write\n"); + } else + rc = server->ops->flush(xid, tcon, &smbfile->fid); } +strict_fsync_exit: free_xid(xid); - inode_unlock(inode); return rc; } @@ -2440,13 +2649,14 @@ struct cifs_tcon *tcon; struct TCP_Server_Info *server; struct cifsFileInfo *smbfile = file->private_data; + struct inode *inode = file_inode(file); struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); - struct inode *inode = file->f_mapping->host; rc = file_write_and_wait_range(file, start, end); - if (rc) + if (rc) { + trace_cifs_fsync_err(file_inode(file)->i_ino, rc); return rc; - inode_lock(inode); + } xid = get_xid(); @@ -2456,14 +2666,24 @@ tcon = tlink_tcon(smbfile->tlink); if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { server = tcon->ses->server; - if (server->ops->flush) - rc = server->ops->flush(xid, tcon, &smbfile->fid); - else + if (server->ops->flush == NULL) { rc = -ENOSYS; + goto fsync_exit; + } + + if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { + smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); + if (smbfile) { + rc = server->ops->flush(xid, tcon, &smbfile->fid); + cifsFileInfo_put(smbfile); + } else + cifs_dbg(FYI, "ignore fsync for file not open for write\n"); + } else + rc = server->ops->flush(xid, tcon, &smbfile->fid); } +fsync_exit: free_xid(xid); - inode_unlock(inode); return rc; } @@ -2480,7 +2700,8 @@ rc = filemap_write_and_wait(inode->i_mapping); cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); - + if (rc) + trace_cifs_flush_err(inode->i_ino, rc); return rc; } @@ -2603,6 +2824,76 @@ } static int +cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, + struct cifs_aio_ctx *ctx) +{ + unsigned int wsize; + struct cifs_credits credits; + int rc; + struct TCP_Server_Info *server = wdata->server; + + do { + if (wdata->cfile->invalidHandle) { + rc = cifs_reopen_file(wdata->cfile, false); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } + + + /* + * Wait for credits to resend this wdata. + * Note: we are attempting to resend the whole wdata not in + * segments + */ + do { + rc = server->ops->wait_mtu_credits(server, wdata->bytes, + &wsize, &credits); + if (rc) + goto fail; + + if (wsize < wdata->bytes) { + add_credits_and_wake_if(server, &credits, 0); + msleep(1000); + } + } while (wsize < wdata->bytes); + wdata->credits = credits; + + rc = adjust_credits(server, &wdata->credits, wdata->bytes); + + if (!rc) { + if (wdata->cfile->invalidHandle) + rc = -EAGAIN; + else { +#ifdef CONFIG_CIFS_SMB_DIRECT + if (wdata->mr) { + wdata->mr->need_invalidate = true; + smbd_deregister_mr(wdata->mr); + wdata->mr = NULL; + } +#endif + rc = server->ops->async_writev(wdata, + cifs_uncached_writedata_release); + } + } + + /* If the write was successfully sent, we are done */ + if (!rc) { + list_add_tail(&wdata->list, wdata_list); + return 0; + } + + /* Roll back credits and retry if needed */ + add_credits_and_wake_if(server, &wdata->credits, 0); + } while (rc == -EAGAIN); + +fail: + kref_put(&wdata->refcount, cifs_uncached_writedata_release); + return rc; +} + +static int cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, struct cifsFileInfo *open_file, struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, @@ -2616,73 +2907,139 @@ loff_t saved_offset = offset; pid_t pid; struct TCP_Server_Info *server; + struct page **pagevec; + size_t start; + unsigned int xid; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; else pid = current->tgid; - server = tlink_tcon(open_file->tlink)->ses->server; + server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); + xid = get_xid(); do { - unsigned int wsize, credits; + unsigned int wsize; + struct cifs_credits credits_on_stack; + struct cifs_credits *credits = &credits_on_stack; + + if (open_file->invalidHandle) { + rc = cifs_reopen_file(open_file, false); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize, - &wsize, &credits); + &wsize, credits); if (rc) break; - nr_pages = get_numpages(wsize, len, &cur_len); - wdata = cifs_writedata_alloc(nr_pages, + cur_len = min_t(const size_t, len, wsize); + + if (ctx->direct_io) { + ssize_t result; + + result = iov_iter_get_pages_alloc( + from, &pagevec, cur_len, &start); + if (result < 0) { + cifs_dbg(VFS, + "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n", + result, iov_iter_type(from), + from->iov_offset, from->count); + dump_stack(); + + rc = result; + add_credits_and_wake_if(server, credits, 0); + break; + } + cur_len = (size_t)result; + iov_iter_advance(from, cur_len); + + nr_pages = + (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE; + + wdata = cifs_writedata_direct_alloc(pagevec, cifs_uncached_writev_complete); - if (!wdata) { - rc = -ENOMEM; - add_credits_and_wake_if(server, credits, 0); - break; - } + if (!wdata) { + rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); + break; + } - rc = cifs_write_allocate_pages(wdata->pages, nr_pages); - if (rc) { - kfree(wdata); - add_credits_and_wake_if(server, credits, 0); - break; - } - num_pages = nr_pages; - rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages); - if (rc) { - for (i = 0; i < nr_pages; i++) - put_page(wdata->pages[i]); - kfree(wdata); - add_credits_and_wake_if(server, credits, 0); - break; - } + wdata->page_offset = start; + wdata->tailsz = + nr_pages > 1 ? + cur_len - (PAGE_SIZE - start) - + (nr_pages - 2) * PAGE_SIZE : + cur_len; + } else { + nr_pages = get_numpages(wsize, len, &cur_len); + wdata = cifs_writedata_alloc(nr_pages, + cifs_uncached_writev_complete); + if (!wdata) { + rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); + break; + } - /* - * Bring nr_pages down to the number of pages we actually used, - * and free any pages that we didn't use. - */ - for ( ; nr_pages > num_pages; nr_pages--) - put_page(wdata->pages[nr_pages - 1]); + rc = cifs_write_allocate_pages(wdata->pages, nr_pages); + if (rc) { + kvfree(wdata->pages); + kfree(wdata); + add_credits_and_wake_if(server, credits, 0); + break; + } + + num_pages = nr_pages; + rc = wdata_fill_from_iovec( + wdata, from, &cur_len, &num_pages); + if (rc) { + for (i = 0; i < nr_pages; i++) + put_page(wdata->pages[i]); + kvfree(wdata->pages); + kfree(wdata); + add_credits_and_wake_if(server, credits, 0); + break; + } + + /* + * Bring nr_pages down to the number of pages we + * actually used, and free any pages that we didn't use. + */ + for ( ; nr_pages > num_pages; nr_pages--) + put_page(wdata->pages[nr_pages - 1]); + + wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); + } wdata->sync_mode = WB_SYNC_ALL; wdata->nr_pages = nr_pages; wdata->offset = (__u64)offset; wdata->cfile = cifsFileInfo_get(open_file); + wdata->server = server; wdata->pid = pid; wdata->bytes = cur_len; wdata->pagesz = PAGE_SIZE; - wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); - wdata->credits = credits; + wdata->credits = credits_on_stack; wdata->ctx = ctx; kref_get(&ctx->refcount); - if (!wdata->cfile->invalidHandle || - !(rc = cifs_reopen_file(wdata->cfile, false))) - rc = server->ops->async_writev(wdata, + rc = adjust_credits(server, &wdata->credits, wdata->bytes); + + if (!rc) { + if (wdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = server->ops->async_writev(wdata, cifs_uncached_writedata_release); + } + if (rc) { - add_credits_and_wake_if(server, wdata->credits, 0); + add_credits_and_wake_if(server, &wdata->credits, 0); kref_put(&wdata->refcount, cifs_uncached_writedata_release); if (rc == -EAGAIN) { @@ -2698,6 +3055,7 @@ len -= cur_len; } while (len > 0); + free_xid(xid); return rc; } @@ -2707,8 +3065,7 @@ struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; struct dentry *dentry = ctx->cfile->dentry; - unsigned int i; - int rc; + ssize_t rc; tcon = tlink_tcon(ctx->cfile->tlink); cifs_sb = CIFS_SB(dentry->d_sb); @@ -2747,27 +3104,29 @@ INIT_LIST_HEAD(&tmp_list); list_del_init(&wdata->list); - iov_iter_advance(&tmp_from, + if (ctx->direct_io) + rc = cifs_resend_wdata( + wdata, &tmp_list, ctx); + else { + iov_iter_advance(&tmp_from, wdata->offset - ctx->pos); - rc = cifs_write_from_iter(wdata->offset, + rc = cifs_write_from_iter(wdata->offset, wdata->bytes, &tmp_from, ctx->cfile, cifs_sb, &tmp_list, ctx); - list_splice(&tmp_list, &ctx->list); + kref_put(&wdata->refcount, + cifs_uncached_writedata_release); + } - kref_put(&wdata->refcount, - cifs_uncached_writedata_release); + list_splice(&tmp_list, &ctx->list); goto restart_loop; } } list_del_init(&wdata->list); kref_put(&wdata->refcount, cifs_uncached_writedata_release); } - - for (i = 0; i < ctx->npages; i++) - put_page(ctx->bv[i].bv_page); cifs_stats_bytes_written(tcon, ctx->total_len); set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); @@ -2782,7 +3141,8 @@ complete(&ctx->done); } -ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) +static ssize_t __cifs_writev( + struct kiocb *iocb, struct iov_iter *from, bool direct) { struct file *file = iocb->ki_filp; ssize_t total_written = 0; @@ -2791,13 +3151,18 @@ struct cifs_sb_info *cifs_sb; struct cifs_aio_ctx *ctx; struct iov_iter saved_from = *from; + size_t len = iov_iter_count(from); int rc; /* - * BB - optimize the way when signing is disabled. We can drop this - * extra memory-to-memory copying and use iovec buffers for constructing - * write request. + * iov_iter_get_pages_alloc doesn't work with ITER_KVEC. + * In this case, fall back to non-direct write function. + * this could be improved by getting pages directly in ITER_KVEC */ + if (direct && iov_iter_is_kvec(from)) { + cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n"); + direct = false; + } rc = generic_write_checks(iocb, from); if (rc <= 0) @@ -2821,10 +3186,16 @@ ctx->pos = iocb->ki_pos; - rc = setup_aio_ctx_iter(ctx, from, WRITE); - if (rc) { - kref_put(&ctx->refcount, cifs_aio_ctx_release); - return rc; + if (direct) { + ctx->direct_io = true; + ctx->iter = *from; + ctx->len = len; + } else { + rc = setup_aio_ctx_iter(ctx, from, WRITE); + if (rc) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return rc; + } } /* grab a lock here due to read response handlers can access ctx */ @@ -2874,6 +3245,19 @@ return total_written; } +ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + + cifs_revalidate_mapping(file->f_inode); + return __cifs_writev(iocb, from, true); +} + +ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) +{ + return __cifs_writev(iocb, from, false); +} + static ssize_t cifs_writev(struct kiocb *iocb, struct iov_iter *from) { @@ -2896,8 +3280,8 @@ goto out; if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), - server->vals->exclusive_lock_type, NULL, - CIFS_WRITE_OP)) + server->vals->exclusive_lock_type, 0, + NULL, CIFS_WRITE_OP)) rc = __generic_file_write_iter(iocb, from); else rc = -EACCES; @@ -3048,7 +3432,6 @@ kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); for (i = 0; i < rdata->nr_pages; i++) { put_page(rdata->pages[i]); - rdata->pages[i] = NULL; } cifs_readdata_release(refcount); } @@ -3073,7 +3456,7 @@ size_t copy = min_t(size_t, remaining, PAGE_SIZE); size_t written; - if (unlikely(iter->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(iter))) { void *addr = kmap_atomic(page); written = copy_to_iter(addr, copy, iter); @@ -3156,7 +3539,7 @@ rdata->got_bytes += result; } - return rdata->got_bytes > 0 && result != -ECONNABORTED ? + return result != -ECONNABORTED && rdata->got_bytes > 0 ? rdata->got_bytes : result; } @@ -3175,70 +3558,209 @@ return uncached_fill_pages(server, rdata, iter, iter->count); } +static int cifs_resend_rdata(struct cifs_readdata *rdata, + struct list_head *rdata_list, + struct cifs_aio_ctx *ctx) +{ + unsigned int rsize; + struct cifs_credits credits; + int rc; + struct TCP_Server_Info *server; + + /* XXX: should we pick a new channel here? */ + server = rdata->server; + + do { + if (rdata->cfile->invalidHandle) { + rc = cifs_reopen_file(rdata->cfile, true); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } + + /* + * Wait for credits to resend this rdata. + * Note: we are attempting to resend the whole rdata not in + * segments + */ + do { + rc = server->ops->wait_mtu_credits(server, rdata->bytes, + &rsize, &credits); + + if (rc) + goto fail; + + if (rsize < rdata->bytes) { + add_credits_and_wake_if(server, &credits, 0); + msleep(1000); + } + } while (rsize < rdata->bytes); + rdata->credits = credits; + + rc = adjust_credits(server, &rdata->credits, rdata->bytes); + if (!rc) { + if (rdata->cfile->invalidHandle) + rc = -EAGAIN; + else { +#ifdef CONFIG_CIFS_SMB_DIRECT + if (rdata->mr) { + rdata->mr->need_invalidate = true; + smbd_deregister_mr(rdata->mr); + rdata->mr = NULL; + } +#endif + rc = server->ops->async_readv(rdata); + } + } + + /* If the read was successfully sent, we are done */ + if (!rc) { + /* Add to aio pending list */ + list_add_tail(&rdata->list, rdata_list); + return 0; + } + + /* Roll back credits and retry if needed */ + add_credits_and_wake_if(server, &rdata->credits, 0); + } while (rc == -EAGAIN); + +fail: + kref_put(&rdata->refcount, cifs_uncached_readdata_release); + return rc; +} + static int cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, struct cifs_aio_ctx *ctx) { struct cifs_readdata *rdata; - unsigned int npages, rsize, credits; + unsigned int npages, rsize; + struct cifs_credits credits_on_stack; + struct cifs_credits *credits = &credits_on_stack; size_t cur_len; int rc; pid_t pid; struct TCP_Server_Info *server; + struct page **pagevec; + size_t start; + struct iov_iter direct_iov = ctx->iter; - server = tlink_tcon(open_file->tlink)->ses->server; + server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; else pid = current->tgid; + if (ctx->direct_io) + iov_iter_advance(&direct_iov, offset - ctx->pos); + do { + if (open_file->invalidHandle) { + rc = cifs_reopen_file(open_file, true); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } + rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize, - &rsize, &credits); + &rsize, credits); if (rc) break; cur_len = min_t(const size_t, len, rsize); - npages = DIV_ROUND_UP(cur_len, PAGE_SIZE); - /* allocate a readdata struct */ - rdata = cifs_readdata_alloc(npages, + if (ctx->direct_io) { + ssize_t result; + + result = iov_iter_get_pages_alloc( + &direct_iov, &pagevec, + cur_len, &start); + if (result < 0) { + cifs_dbg(VFS, + "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n", + result, iov_iter_type(&direct_iov), + direct_iov.iov_offset, + direct_iov.count); + dump_stack(); + + rc = result; + add_credits_and_wake_if(server, credits, 0); + break; + } + cur_len = (size_t)result; + iov_iter_advance(&direct_iov, cur_len); + + rdata = cifs_readdata_direct_alloc( + pagevec, cifs_uncached_readv_complete); + if (!rdata) { + add_credits_and_wake_if(server, credits, 0); + rc = -ENOMEM; + break; + } + + npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE; + rdata->page_offset = start; + rdata->tailsz = npages > 1 ? + cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE : + cur_len; + + } else { + + npages = DIV_ROUND_UP(cur_len, PAGE_SIZE); + /* allocate a readdata struct */ + rdata = cifs_readdata_alloc(npages, cifs_uncached_readv_complete); - if (!rdata) { - add_credits_and_wake_if(server, credits, 0); - rc = -ENOMEM; - break; + if (!rdata) { + add_credits_and_wake_if(server, credits, 0); + rc = -ENOMEM; + break; + } + + rc = cifs_read_allocate_pages(rdata, npages); + if (rc) { + kvfree(rdata->pages); + kfree(rdata); + add_credits_and_wake_if(server, credits, 0); + break; + } + + rdata->tailsz = PAGE_SIZE; } - rc = cifs_read_allocate_pages(rdata, npages); - if (rc) - goto error; - + rdata->server = server; rdata->cfile = cifsFileInfo_get(open_file); rdata->nr_pages = npages; rdata->offset = offset; rdata->bytes = cur_len; rdata->pid = pid; rdata->pagesz = PAGE_SIZE; - rdata->tailsz = PAGE_SIZE; rdata->read_into_pages = cifs_uncached_read_into_pages; rdata->copy_into_pages = cifs_uncached_copy_into_pages; - rdata->credits = credits; + rdata->credits = credits_on_stack; rdata->ctx = ctx; kref_get(&ctx->refcount); - if (!rdata->cfile->invalidHandle || - !(rc = cifs_reopen_file(rdata->cfile, true))) - rc = server->ops->async_readv(rdata); -error: + rc = adjust_credits(server, &rdata->credits, rdata->bytes); + + if (!rc) { + if (rdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = server->ops->async_readv(rdata); + } + if (rc) { - add_credits_and_wake_if(server, rdata->credits, 0); + add_credits_and_wake_if(server, &rdata->credits, 0); kref_put(&rdata->refcount, - cifs_uncached_readdata_release); - if (rc == -EAGAIN) + cifs_uncached_readdata_release); + if (rc == -EAGAIN) { + iov_iter_revert(&direct_iov, cur_len); continue; + } break; } @@ -3256,11 +3778,8 @@ struct cifs_readdata *rdata, *tmp; struct iov_iter *to = &ctx->iter; struct cifs_sb_info *cifs_sb; - struct cifs_tcon *tcon; - unsigned int i; int rc; - tcon = tlink_tcon(ctx->cfile->tlink); cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); mutex_lock(&ctx->aio_mutex); @@ -3294,47 +3813,55 @@ * reading. */ if (got_bytes && got_bytes < rdata->bytes) { - rc = cifs_readdata_to_iov(rdata, to); + rc = 0; + if (!ctx->direct_io) + rc = cifs_readdata_to_iov(rdata, to); if (rc) { kref_put(&rdata->refcount, - cifs_uncached_readdata_release); + cifs_uncached_readdata_release); continue; } } - rc = cifs_send_async_read( + if (ctx->direct_io) { + /* + * Re-use rdata as this is a + * direct I/O + */ + rc = cifs_resend_rdata( + rdata, + &tmp_list, ctx); + } else { + rc = cifs_send_async_read( rdata->offset + got_bytes, rdata->bytes - got_bytes, rdata->cfile, cifs_sb, &tmp_list, ctx); + kref_put(&rdata->refcount, + cifs_uncached_readdata_release); + } + list_splice(&tmp_list, &ctx->list); - kref_put(&rdata->refcount, - cifs_uncached_readdata_release); goto again; } else if (rdata->result) rc = rdata->result; - else + else if (!ctx->direct_io) rc = cifs_readdata_to_iov(rdata, to); /* if there was a short read -- discard anything left */ if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) rc = -ENODATA; + + ctx->total_len += rdata->got_bytes; } list_del_init(&rdata->list); kref_put(&rdata->refcount, cifs_uncached_readdata_release); } - for (i = 0; i < ctx->npages; i++) { - if (ctx->should_dirty) - set_page_dirty(ctx->bv[i].bv_page); - put_page(ctx->bv[i].bv_page); - } - - ctx->total_len = ctx->len - iov_iter_count(to); - - cifs_stats_bytes_read(tcon, ctx->total_len); + if (!ctx->direct_io) + ctx->total_len = ctx->len - iov_iter_count(to); /* mask nodata case */ if (rc == -ENODATA) @@ -3350,17 +3877,27 @@ complete(&ctx->done); } -ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) +static ssize_t __cifs_readv( + struct kiocb *iocb, struct iov_iter *to, bool direct) { - struct file *file = iocb->ki_filp; - ssize_t rc; size_t len; - ssize_t total_read = 0; - loff_t offset = iocb->ki_pos; + struct file *file = iocb->ki_filp; struct cifs_sb_info *cifs_sb; - struct cifs_tcon *tcon; struct cifsFileInfo *cfile; + struct cifs_tcon *tcon; + ssize_t rc, total_read = 0; + loff_t offset = iocb->ki_pos; struct cifs_aio_ctx *ctx; + + /* + * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC, + * fall back to data copy read path + * this could be improved by getting pages directly in ITER_KVEC + */ + if (direct && iov_iter_is_kvec(to)) { + cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n"); + direct = false; + } len = iov_iter_count(to); if (!len) @@ -3385,16 +3922,31 @@ if (!is_sync_kiocb(iocb)) ctx->iocb = iocb; - if (to->type == ITER_IOVEC) + if (iter_is_iovec(to)) ctx->should_dirty = true; - rc = setup_aio_ctx_iter(ctx, to, READ); - if (rc) { - kref_put(&ctx->refcount, cifs_aio_ctx_release); - return rc; + if (direct) { + ctx->pos = offset; + ctx->direct_io = true; + ctx->iter = *to; + ctx->len = len; + } else { + rc = setup_aio_ctx_iter(ctx, to, READ); + if (rc) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return rc; + } + len = ctx->len; } - len = ctx->len; + if (direct) { + rc = filemap_write_and_wait_range(file->f_inode->i_mapping, + offset, offset + len - 1); + if (rc) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return -EAGAIN; + } + } /* grab a lock here due to read response handlers can access ctx */ mutex_lock(&ctx->aio_mutex); @@ -3437,6 +3989,16 @@ return rc; } +ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) +{ + return __cifs_readv(iocb, to, true); +} + +ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) +{ + return __cifs_readv(iocb, to, false); +} + ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) { @@ -3471,7 +4033,7 @@ down_read(&cinode->lock_sem); if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), tcon->ses->server->vals->shared_lock_type, - NULL, CIFS_READ_OP)) + 0, NULL, CIFS_READ_OP)) rc = generic_file_read_iter(iocb, to); up_read(&cinode->lock_sem); return rc; @@ -3491,7 +4053,7 @@ unsigned int xid; char *cur_offset; struct cifsFileInfo *open_file; - struct cifs_io_parms io_parms; + struct cifs_io_parms io_parms = {0}; int buf_type = CIFS_NO_BUFFER; __u32 pid; @@ -3508,7 +4070,7 @@ } open_file = file->private_data; tcon = tlink_tcon(open_file->tlink); - server = tcon->ses->server; + server = cifs_pick_channel(tcon->ses); if (!server->ops->sync_read) { free_xid(xid); @@ -3547,6 +4109,7 @@ io_parms.tcon = tcon; io_parms.offset = *offset; io_parms.length = current_read_size; + io_parms.server = server; rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, &bytes_read, &cur_offset, &buf_type); @@ -3635,7 +4198,7 @@ for (i = 0; i < rdata->nr_pages; i++) { struct page *page = rdata->pages[i]; - lru_cache_add_file(page); + lru_cache_add(page); if (rdata->result == 0 || (rdata->result == -EAGAIN && got_bytes)) { @@ -3705,7 +4268,7 @@ * fill them until the writes are flushed. */ zero_user(page, 0, PAGE_SIZE); - lru_cache_add_file(page); + lru_cache_add(page); flush_dcache_page(page); SetPageUptodate(page); unlock_page(page); @@ -3715,7 +4278,7 @@ continue; } else { /* no need to hold page hostage */ - lru_cache_add_file(page); + lru_cache_add(page); unlock_page(page); put_page(page); rdata->pages[i] = NULL; @@ -3739,7 +4302,7 @@ rdata->got_bytes += result; } - return rdata->got_bytes > 0 && result != -ECONNABORTED ? + return result != -ECONNABORTED && rdata->got_bytes > 0 ? rdata->got_bytes : result; } @@ -3770,7 +4333,7 @@ INIT_LIST_HEAD(tmplist); - page = list_entry(page_list->prev, struct page, lru); + page = lru_to_page(page_list); /* * Lock the page and put it in the cache. Since no one else @@ -3828,7 +4391,9 @@ struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); struct TCP_Server_Info *server; pid_t pid; + unsigned int xid; + xid = get_xid(); /* * Reads as many pages as possible from fscache. Returns -ENOBUFS * immediately if the cookie is negative @@ -3838,8 +4403,10 @@ */ rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list, &num_pages); - if (rc == 0) + if (rc == 0) { + free_xid(xid); return rc; + } if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; @@ -3847,7 +4414,7 @@ pid = current->tgid; rc = 0; - server = tlink_tcon(open_file->tlink)->ses->server; + server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", __func__, file, mapping, num_pages); @@ -3868,10 +4435,19 @@ loff_t offset; struct page *page, *tpage; struct cifs_readdata *rdata; - unsigned credits; + struct cifs_credits credits_on_stack; + struct cifs_credits *credits = &credits_on_stack; + + if (open_file->invalidHandle) { + rc = cifs_reopen_file(open_file, true); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize, - &rsize, &credits); + &rsize, credits); if (rc) break; @@ -3883,6 +4459,7 @@ */ if (unlikely(rsize < PAGE_SIZE)) { add_credits_and_wake_if(server, credits, 0); + free_xid(xid); return 0; } @@ -3899,7 +4476,7 @@ /* best to give up if we're out of mem */ list_for_each_entry_safe(page, tpage, &tmplist, lru) { list_del(&page->lru); - lru_cache_add_file(page); + lru_cache_add(page); unlock_page(page); put_page(page); } @@ -3909,6 +4486,7 @@ } rdata->cfile = cifsFileInfo_get(open_file); + rdata->server = server; rdata->mapping = mapping; rdata->offset = offset; rdata->bytes = bytes; @@ -3917,21 +4495,27 @@ rdata->tailsz = PAGE_SIZE; rdata->read_into_pages = cifs_readpages_read_into_pages; rdata->copy_into_pages = cifs_readpages_copy_into_pages; - rdata->credits = credits; + rdata->credits = credits_on_stack; list_for_each_entry_safe(page, tpage, &tmplist, lru) { list_del(&page->lru); rdata->pages[rdata->nr_pages++] = page; } - if (!rdata->cfile->invalidHandle || - !(rc = cifs_reopen_file(rdata->cfile, true))) - rc = server->ops->async_readv(rdata); + rc = adjust_credits(server, &rdata->credits, rdata->bytes); + + if (!rc) { + if (rdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = server->ops->async_readv(rdata); + } + if (rc) { - add_credits_and_wake_if(server, rdata->credits, 0); + add_credits_and_wake_if(server, &rdata->credits, 0); for (i = 0; i < rdata->nr_pages; i++) { page = rdata->pages[i]; - lru_cache_add_file(page); + lru_cache_add(page); unlock_page(page); put_page(page); } @@ -3948,6 +4532,7 @@ * allocator. */ cifs_fscache_readpages_cancel(mapping->host, page_list); + free_xid(xid); return rc; } @@ -3975,8 +4560,12 @@ else cifs_dbg(FYI, "Bytes read %d\n", rc); - file_inode(file)->i_atime = - current_time(file_inode(file)); + /* we do not want atime to be less than mtime, it broke some apps */ + file_inode(file)->i_atime = current_time(file_inode(file)); + if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime))) + file_inode(file)->i_atime = file_inode(file)->i_mtime; + else + file_inode(file)->i_atime = current_time(file_inode(file)); if (PAGE_SIZE > rc) memset(read_data + rc, 0, PAGE_SIZE - rc); @@ -3991,15 +4580,15 @@ io_error: kunmap(page); - unlock_page(page); read_complete: + unlock_page(page); return rc; } static int cifs_readpage(struct file *file, struct page *page) { - loff_t offset = (loff_t)page->index << PAGE_SHIFT; + loff_t offset = page_file_offset(page); int rc = -EACCES; unsigned int xid; @@ -4236,7 +4825,7 @@ cinode); cifs_dbg(FYI, "Oplock release rc = %d\n", rc); } - _cifsFileInfo_put(cfile, false /* do not wait for ourself */); + _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); cifs_done_oplock_break(cinode); } @@ -4259,6 +4848,60 @@ return -EINVAL; } +static int cifs_swap_activate(struct swap_info_struct *sis, + struct file *swap_file, sector_t *span) +{ + struct cifsFileInfo *cfile = swap_file->private_data; + struct inode *inode = swap_file->f_mapping->host; + unsigned long blocks; + long long isize; + + cifs_dbg(FYI, "swap activate\n"); + + spin_lock(&inode->i_lock); + blocks = inode->i_blocks; + isize = inode->i_size; + spin_unlock(&inode->i_lock); + if (blocks*512 < isize) { + pr_warn("swap activate: swapfile has holes\n"); + return -EINVAL; + } + *span = sis->pages; + + pr_warn_once("Swap support over SMB3 is experimental\n"); + + /* + * TODO: consider adding ACL (or documenting how) to prevent other + * users (on this or other systems) from reading it + */ + + + /* TODO: add sk_set_memalloc(inet) or similar */ + + if (cfile) + cfile->swapfile = true; + /* + * TODO: Since file already open, we can't open with DENY_ALL here + * but we could add call to grab a byte range lock to prevent others + * from reading or writing the file + */ + + return 0; +} + +static void cifs_swap_deactivate(struct file *file) +{ + struct cifsFileInfo *cfile = file->private_data; + + cifs_dbg(FYI, "swap deactivate\n"); + + /* TODO: undo sk_set_memalloc(inet) will eventually be needed */ + + if (cfile) + cfile->swapfile = false; + + /* do we need to unpin (or unlock) the file */ +} const struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, @@ -4272,6 +4915,13 @@ .direct_IO = cifs_direct_io, .invalidatepage = cifs_invalidate_page, .launder_page = cifs_launder_page, + /* + * TODO: investigate and if useful we could add an cifs_migratePage + * helper (under an CONFIG_MIGRATION) in the future, and also + * investigate and add an is_dirty_writeback helper if needed + */ + .swap_activate = cifs_swap_activate, + .swap_deactivate = cifs_swap_deactivate, }; /* -- Gitblit v1.6.2