| .. | .. |
|---|
| 1 | 1 | // SPDX-License-Identifier: GPL-2.0 |
|---|
| 2 | 2 | /* |
|---|
| 3 | 3 | * (C) 2001 Clemson University and The University of Chicago |
|---|
| 4 | + * Copyright 2018 Omnibond Systems, L.L.C. |
|---|
| 4 | 5 | * |
|---|
| 5 | 6 | * See COPYING in top-level directory. |
|---|
| 6 | 7 | */ |
|---|
| .. | .. |
|---|
| 44 | 45 | /* |
|---|
| 45 | 46 | * Post and wait for the I/O upcall to finish |
|---|
| 46 | 47 | */ |
|---|
| 47 | | -static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, |
|---|
| 48 | | - loff_t *offset, struct iov_iter *iter, |
|---|
| 49 | | - size_t total_size, loff_t readahead_size) |
|---|
| 48 | +ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, |
|---|
| 49 | + loff_t *offset, struct iov_iter *iter, size_t total_size, |
|---|
| 50 | + loff_t readahead_size, struct orangefs_write_range *wr, |
|---|
| 51 | + int *index_return, struct file *file) |
|---|
| 50 | 52 | { |
|---|
| 51 | 53 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
|---|
| 52 | 54 | struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; |
|---|
| 53 | 55 | struct orangefs_kernel_op_s *new_op = NULL; |
|---|
| 54 | | - int buffer_index = -1; |
|---|
| 56 | + int buffer_index; |
|---|
| 55 | 57 | ssize_t ret; |
|---|
| 58 | + size_t copy_amount; |
|---|
| 59 | + int open_for_read; |
|---|
| 60 | + int open_for_write; |
|---|
| 56 | 61 | |
|---|
| 57 | 62 | new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO); |
|---|
| 58 | 63 | if (!new_op) |
|---|
| .. | .. |
|---|
| 84 | 89 | new_op->upcall.req.io.buf_index = buffer_index; |
|---|
| 85 | 90 | new_op->upcall.req.io.count = total_size; |
|---|
| 86 | 91 | new_op->upcall.req.io.offset = *offset; |
|---|
| 92 | + if (type == ORANGEFS_IO_WRITE && wr) { |
|---|
| 93 | + new_op->upcall.uid = from_kuid(&init_user_ns, wr->uid); |
|---|
| 94 | + new_op->upcall.gid = from_kgid(&init_user_ns, wr->gid); |
|---|
| 95 | + } |
|---|
| 96 | + /* |
|---|
| 97 | + * Orangefs has no open, and orangefs checks file permissions |
|---|
| 98 | + * on each file access. Posix requires that file permissions |
|---|
| 99 | + * be checked on open and nowhere else. Orangefs-through-the-kernel |
|---|
| 100 | + * needs to seem posix compliant. |
|---|
| 101 | + * |
|---|
| 102 | + * The VFS opens files, even if the filesystem provides no |
|---|
| 103 | + * method. We can see if a file was successfully opened for |
|---|
| 104 | + * read and or for write by looking at file->f_mode. |
|---|
| 105 | + * |
|---|
| 106 | + * When writes are flowing from the page cache, file is no |
|---|
| 107 | + * longer available. We can trust the VFS to have checked |
|---|
| 108 | + * file->f_mode before writing to the page cache. |
|---|
| 109 | + * |
|---|
| 110 | + * The mode of a file might change between when it is opened |
|---|
| 111 | + * and IO commences, or it might be created with an arbitrary mode. |
|---|
| 112 | + * |
|---|
| 113 | + * We'll make sure we don't hit EACCES during the IO stage by |
|---|
| 114 | + * using UID 0. Some of the time we have access without changing |
|---|
| 115 | + * to UID 0 - how to check? |
|---|
| 116 | + */ |
|---|
| 117 | + if (file) { |
|---|
| 118 | + open_for_write = file->f_mode & FMODE_WRITE; |
|---|
| 119 | + open_for_read = file->f_mode & FMODE_READ; |
|---|
| 120 | + } else { |
|---|
| 121 | + open_for_write = 1; |
|---|
| 122 | + open_for_read = 0; /* not relevant? */ |
|---|
| 123 | + } |
|---|
| 124 | + if ((type == ORANGEFS_IO_WRITE) && open_for_write) |
|---|
| 125 | + new_op->upcall.uid = 0; |
|---|
| 126 | + if ((type == ORANGEFS_IO_READ) && open_for_read) |
|---|
| 127 | + new_op->upcall.uid = 0; |
|---|
| 87 | 128 | |
|---|
| 88 | 129 | gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 89 | 130 | "%s(%pU): offset: %llu total_size: %zd\n", |
|---|
| .. | .. |
|---|
| 128 | 169 | */ |
|---|
| 129 | 170 | if (ret == -EAGAIN && op_state_purged(new_op)) { |
|---|
| 130 | 171 | orangefs_bufmap_put(buffer_index); |
|---|
| 131 | | - buffer_index = -1; |
|---|
| 132 | 172 | if (type == ORANGEFS_IO_WRITE) |
|---|
| 133 | 173 | iov_iter_revert(iter, total_size); |
|---|
| 134 | 174 | gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| .. | .. |
|---|
| 168 | 208 | * trigger the write. |
|---|
| 169 | 209 | */ |
|---|
| 170 | 210 | case OP_VFS_STATE_INPROGR: |
|---|
| 171 | | - ret = total_size; |
|---|
| 211 | + if (type == ORANGEFS_IO_READ) |
|---|
| 212 | + ret = -EINTR; |
|---|
| 213 | + else |
|---|
| 214 | + ret = total_size; |
|---|
| 172 | 215 | break; |
|---|
| 173 | 216 | default: |
|---|
| 174 | 217 | gossip_err("%s: unexpected op state :%d:.\n", |
|---|
| .. | .. |
|---|
| 204 | 247 | * can futher be kernel-space or user-space addresses. |
|---|
| 205 | 248 | * or it can pointers to struct page's |
|---|
| 206 | 249 | */ |
|---|
| 250 | + |
|---|
| 251 | + /* |
|---|
| 252 | + * When reading, readahead_size will only be zero when |
|---|
| 253 | + * we're doing O_DIRECT, otherwise we got here from |
|---|
| 254 | + * orangefs_readpage. |
|---|
| 255 | + * |
|---|
| 256 | + * If we got here from orangefs_readpage we want to |
|---|
| 257 | + * copy either a page or the whole file into the io |
|---|
| 258 | + * vector, whichever is smaller. |
|---|
| 259 | + */ |
|---|
| 260 | + if (readahead_size) |
|---|
| 261 | + copy_amount = |
|---|
| 262 | + min(new_op->downcall.resp.io.amt_complete, |
|---|
| 263 | + (__s64)PAGE_SIZE); |
|---|
| 264 | + else |
|---|
| 265 | + copy_amount = new_op->downcall.resp.io.amt_complete; |
|---|
| 266 | + |
|---|
| 207 | 267 | ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index, |
|---|
| 208 | | - new_op->downcall.resp.io.amt_complete); |
|---|
| 268 | + copy_amount); |
|---|
| 209 | 269 | if (ret < 0) { |
|---|
| 210 | 270 | gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", |
|---|
| 211 | 271 | __func__, (long)ret); |
|---|
| .. | .. |
|---|
| 223 | 283 | |
|---|
| 224 | 284 | out: |
|---|
| 225 | 285 | if (buffer_index >= 0) { |
|---|
| 226 | | - orangefs_bufmap_put(buffer_index); |
|---|
| 227 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 228 | | - "%s(%pU): PUT buffer_index %d\n", |
|---|
| 229 | | - __func__, handle, buffer_index); |
|---|
| 230 | | - buffer_index = -1; |
|---|
| 286 | + if ((readahead_size) && (type == ORANGEFS_IO_READ)) { |
|---|
| 287 | + /* readpage */ |
|---|
| 288 | + *index_return = buffer_index; |
|---|
| 289 | + gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 290 | + "%s: hold on to buffer_index :%d:\n", |
|---|
| 291 | + __func__, buffer_index); |
|---|
| 292 | + } else { |
|---|
| 293 | + /* O_DIRECT */ |
|---|
| 294 | + orangefs_bufmap_put(buffer_index); |
|---|
| 295 | + gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 296 | + "%s(%pU): PUT buffer_index %d\n", |
|---|
| 297 | + __func__, handle, buffer_index); |
|---|
| 298 | + } |
|---|
| 231 | 299 | } |
|---|
| 232 | 300 | op_release(new_op); |
|---|
| 233 | 301 | return ret; |
|---|
| 234 | 302 | } |
|---|
| 235 | 303 | |
|---|
| 236 | | -/* |
|---|
| 237 | | - * Common entry point for read/write/readv/writev |
|---|
| 238 | | - * This function will dispatch it to either the direct I/O |
|---|
| 239 | | - * or buffered I/O path depending on the mount options and/or |
|---|
| 240 | | - * augmented/extended metadata attached to the file. |
|---|
| 241 | | - * Note: File extended attributes override any mount options. |
|---|
| 242 | | - */ |
|---|
| 243 | | -static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file, |
|---|
| 244 | | - loff_t *offset, struct iov_iter *iter) |
|---|
| 304 | +int orangefs_revalidate_mapping(struct inode *inode) |
|---|
| 245 | 305 | { |
|---|
| 246 | | - struct inode *inode = file->f_mapping->host; |
|---|
| 247 | 306 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
|---|
| 248 | | - struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; |
|---|
| 249 | | - size_t count = iov_iter_count(iter); |
|---|
| 250 | | - ssize_t total_count = 0; |
|---|
| 251 | | - ssize_t ret = -EINVAL; |
|---|
| 307 | + struct address_space *mapping = inode->i_mapping; |
|---|
| 308 | + unsigned long *bitlock = &orangefs_inode->bitlock; |
|---|
| 309 | + int ret; |
|---|
| 252 | 310 | |
|---|
| 253 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 254 | | - "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", |
|---|
| 255 | | - __func__, |
|---|
| 256 | | - handle, |
|---|
| 257 | | - (int)count); |
|---|
| 258 | | - |
|---|
| 259 | | - if (type == ORANGEFS_IO_WRITE) { |
|---|
| 260 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 261 | | - "%s(%pU): proceeding with offset : %llu, " |
|---|
| 262 | | - "size %d\n", |
|---|
| 263 | | - __func__, |
|---|
| 264 | | - handle, |
|---|
| 265 | | - llu(*offset), |
|---|
| 266 | | - (int)count); |
|---|
| 267 | | - } |
|---|
| 268 | | - |
|---|
| 269 | | - if (count == 0) { |
|---|
| 270 | | - ret = 0; |
|---|
| 271 | | - goto out; |
|---|
| 272 | | - } |
|---|
| 273 | | - |
|---|
| 274 | | - while (iov_iter_count(iter)) { |
|---|
| 275 | | - size_t each_count = iov_iter_count(iter); |
|---|
| 276 | | - size_t amt_complete; |
|---|
| 277 | | - |
|---|
| 278 | | - /* how much to transfer in this loop iteration */ |
|---|
| 279 | | - if (each_count > orangefs_bufmap_size_query()) |
|---|
| 280 | | - each_count = orangefs_bufmap_size_query(); |
|---|
| 281 | | - |
|---|
| 282 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 283 | | - "%s(%pU): size of each_count(%d)\n", |
|---|
| 284 | | - __func__, |
|---|
| 285 | | - handle, |
|---|
| 286 | | - (int)each_count); |
|---|
| 287 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 288 | | - "%s(%pU): BEFORE wait_for_io: offset is %d\n", |
|---|
| 289 | | - __func__, |
|---|
| 290 | | - handle, |
|---|
| 291 | | - (int)*offset); |
|---|
| 292 | | - |
|---|
| 293 | | - ret = wait_for_direct_io(type, inode, offset, iter, |
|---|
| 294 | | - each_count, 0); |
|---|
| 295 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 296 | | - "%s(%pU): return from wait_for_io:%d\n", |
|---|
| 297 | | - __func__, |
|---|
| 298 | | - handle, |
|---|
| 299 | | - (int)ret); |
|---|
| 300 | | - |
|---|
| 301 | | - if (ret < 0) |
|---|
| 302 | | - goto out; |
|---|
| 303 | | - |
|---|
| 304 | | - *offset += ret; |
|---|
| 305 | | - total_count += ret; |
|---|
| 306 | | - amt_complete = ret; |
|---|
| 307 | | - |
|---|
| 308 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 309 | | - "%s(%pU): AFTER wait_for_io: offset is %d\n", |
|---|
| 310 | | - __func__, |
|---|
| 311 | | - handle, |
|---|
| 312 | | - (int)*offset); |
|---|
| 313 | | - |
|---|
| 314 | | - /* |
|---|
| 315 | | - * if we got a short I/O operations, |
|---|
| 316 | | - * fall out and return what we got so far |
|---|
| 317 | | - */ |
|---|
| 318 | | - if (amt_complete < each_count) |
|---|
| 311 | + while (1) { |
|---|
| 312 | + ret = wait_on_bit(bitlock, 1, TASK_KILLABLE); |
|---|
| 313 | + if (ret) |
|---|
| 314 | + return ret; |
|---|
| 315 | + spin_lock(&inode->i_lock); |
|---|
| 316 | + if (test_bit(1, bitlock)) { |
|---|
| 317 | + spin_unlock(&inode->i_lock); |
|---|
| 318 | + continue; |
|---|
| 319 | + } |
|---|
| 320 | + if (!time_before(jiffies, orangefs_inode->mapping_time)) |
|---|
| 319 | 321 | break; |
|---|
| 320 | | - } /*end while */ |
|---|
| 322 | + spin_unlock(&inode->i_lock); |
|---|
| 323 | + return 0; |
|---|
| 324 | + } |
|---|
| 321 | 325 | |
|---|
| 326 | + set_bit(1, bitlock); |
|---|
| 327 | + smp_wmb(); |
|---|
| 328 | + spin_unlock(&inode->i_lock); |
|---|
| 329 | + |
|---|
| 330 | + unmap_mapping_range(mapping, 0, 0, 0); |
|---|
| 331 | + ret = filemap_write_and_wait(mapping); |
|---|
| 332 | + if (!ret) |
|---|
| 333 | + ret = invalidate_inode_pages2(mapping); |
|---|
| 334 | + |
|---|
| 335 | + orangefs_inode->mapping_time = jiffies + |
|---|
| 336 | + orangefs_cache_timeout_msecs*HZ/1000; |
|---|
| 337 | + |
|---|
| 338 | + clear_bit(1, bitlock); |
|---|
| 339 | + smp_mb__after_atomic(); |
|---|
| 340 | + wake_up_bit(bitlock, 1); |
|---|
| 341 | + |
|---|
| 342 | + return ret; |
|---|
| 343 | +} |
|---|
| 344 | + |
|---|
| 345 | +static ssize_t orangefs_file_read_iter(struct kiocb *iocb, |
|---|
| 346 | + struct iov_iter *iter) |
|---|
| 347 | +{ |
|---|
| 348 | + int ret; |
|---|
| 349 | + orangefs_stats.reads++; |
|---|
| 350 | + |
|---|
| 351 | + down_read(&file_inode(iocb->ki_filp)->i_rwsem); |
|---|
| 352 | + ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp)); |
|---|
| 353 | + if (ret) |
|---|
| 354 | + goto out; |
|---|
| 355 | + |
|---|
| 356 | + ret = generic_file_read_iter(iocb, iter); |
|---|
| 322 | 357 | out: |
|---|
| 323 | | - if (total_count > 0) |
|---|
| 324 | | - ret = total_count; |
|---|
| 325 | | - if (ret > 0) { |
|---|
| 326 | | - if (type == ORANGEFS_IO_READ) { |
|---|
| 327 | | - file_accessed(file); |
|---|
| 328 | | - } else { |
|---|
| 329 | | - file_update_time(file); |
|---|
| 330 | | - /* |
|---|
| 331 | | - * Must invalidate to ensure write loop doesn't |
|---|
| 332 | | - * prevent kernel from reading updated |
|---|
| 333 | | - * attribute. Size probably changed because of |
|---|
| 334 | | - * the write, and other clients could update |
|---|
| 335 | | - * any other attribute. |
|---|
| 336 | | - */ |
|---|
| 337 | | - orangefs_inode->getattr_time = jiffies - 1; |
|---|
| 338 | | - } |
|---|
| 339 | | - } |
|---|
| 340 | | - |
|---|
| 341 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 342 | | - "%s(%pU): Value(%d) returned.\n", |
|---|
| 343 | | - __func__, |
|---|
| 344 | | - handle, |
|---|
| 345 | | - (int)ret); |
|---|
| 346 | | - |
|---|
| 358 | + up_read(&file_inode(iocb->ki_filp)->i_rwsem); |
|---|
| 347 | 359 | return ret; |
|---|
| 348 | 360 | } |
|---|
| 349 | 361 | |
|---|
| 350 | | -/* |
|---|
| 351 | | - * Read data from a specified offset in a file (referenced by inode). |
|---|
| 352 | | - * Data may be placed either in a user or kernel buffer. |
|---|
| 353 | | - */ |
|---|
| 354 | | -ssize_t orangefs_inode_read(struct inode *inode, |
|---|
| 355 | | - struct iov_iter *iter, |
|---|
| 356 | | - loff_t *offset, |
|---|
| 357 | | - loff_t readahead_size) |
|---|
| 362 | +static ssize_t orangefs_file_write_iter(struct kiocb *iocb, |
|---|
| 363 | + struct iov_iter *iter) |
|---|
| 358 | 364 | { |
|---|
| 359 | | - struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
|---|
| 360 | | - size_t count = iov_iter_count(iter); |
|---|
| 361 | | - size_t bufmap_size; |
|---|
| 362 | | - ssize_t ret = -EINVAL; |
|---|
| 363 | | - |
|---|
| 364 | | - orangefs_stats.reads++; |
|---|
| 365 | | - |
|---|
| 366 | | - bufmap_size = orangefs_bufmap_size_query(); |
|---|
| 367 | | - if (count > bufmap_size) { |
|---|
| 368 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 369 | | - "%s: count is too large (%zd/%zd)!\n", |
|---|
| 370 | | - __func__, count, bufmap_size); |
|---|
| 371 | | - return -EINVAL; |
|---|
| 372 | | - } |
|---|
| 373 | | - |
|---|
| 374 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 375 | | - "%s(%pU) %zd@%llu\n", |
|---|
| 376 | | - __func__, |
|---|
| 377 | | - &orangefs_inode->refn.khandle, |
|---|
| 378 | | - count, |
|---|
| 379 | | - llu(*offset)); |
|---|
| 380 | | - |
|---|
| 381 | | - ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, offset, iter, |
|---|
| 382 | | - count, readahead_size); |
|---|
| 383 | | - if (ret > 0) |
|---|
| 384 | | - *offset += ret; |
|---|
| 385 | | - |
|---|
| 386 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 387 | | - "%s(%pU): Value(%zd) returned.\n", |
|---|
| 388 | | - __func__, |
|---|
| 389 | | - &orangefs_inode->refn.khandle, |
|---|
| 390 | | - ret); |
|---|
| 391 | | - |
|---|
| 392 | | - return ret; |
|---|
| 393 | | -} |
|---|
| 394 | | - |
|---|
| 395 | | -static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) |
|---|
| 396 | | -{ |
|---|
| 397 | | - struct file *file = iocb->ki_filp; |
|---|
| 398 | | - loff_t pos = iocb->ki_pos; |
|---|
| 399 | | - ssize_t rc = 0; |
|---|
| 400 | | - |
|---|
| 401 | | - BUG_ON(iocb->private); |
|---|
| 402 | | - |
|---|
| 403 | | - gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_read_iter\n"); |
|---|
| 404 | | - |
|---|
| 405 | | - orangefs_stats.reads++; |
|---|
| 406 | | - |
|---|
| 407 | | - rc = do_readv_writev(ORANGEFS_IO_READ, file, &pos, iter); |
|---|
| 408 | | - iocb->ki_pos = pos; |
|---|
| 409 | | - |
|---|
| 410 | | - return rc; |
|---|
| 411 | | -} |
|---|
| 412 | | - |
|---|
| 413 | | -static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) |
|---|
| 414 | | -{ |
|---|
| 415 | | - struct file *file = iocb->ki_filp; |
|---|
| 416 | | - loff_t pos; |
|---|
| 417 | | - ssize_t rc; |
|---|
| 418 | | - |
|---|
| 419 | | - BUG_ON(iocb->private); |
|---|
| 420 | | - |
|---|
| 421 | | - gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_write_iter\n"); |
|---|
| 422 | | - |
|---|
| 423 | | - inode_lock(file->f_mapping->host); |
|---|
| 424 | | - |
|---|
| 425 | | - /* Make sure generic_write_checks sees an up to date inode size. */ |
|---|
| 426 | | - if (file->f_flags & O_APPEND) { |
|---|
| 427 | | - rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1, |
|---|
| 428 | | - STATX_SIZE); |
|---|
| 429 | | - if (rc == -ESTALE) |
|---|
| 430 | | - rc = -EIO; |
|---|
| 431 | | - if (rc) { |
|---|
| 432 | | - gossip_err("%s: orangefs_inode_getattr failed, " |
|---|
| 433 | | - "rc:%zd:.\n", __func__, rc); |
|---|
| 434 | | - goto out; |
|---|
| 435 | | - } |
|---|
| 436 | | - } |
|---|
| 437 | | - |
|---|
| 438 | | - rc = generic_write_checks(iocb, iter); |
|---|
| 439 | | - |
|---|
| 440 | | - if (rc <= 0) { |
|---|
| 441 | | - gossip_err("%s: generic_write_checks failed, rc:%zd:.\n", |
|---|
| 442 | | - __func__, rc); |
|---|
| 443 | | - goto out; |
|---|
| 444 | | - } |
|---|
| 445 | | - |
|---|
| 446 | | - /* |
|---|
| 447 | | - * if we are appending, generic_write_checks would have updated |
|---|
| 448 | | - * pos to the end of the file, so we will wait till now to set |
|---|
| 449 | | - * pos... |
|---|
| 450 | | - */ |
|---|
| 451 | | - pos = iocb->ki_pos; |
|---|
| 452 | | - |
|---|
| 453 | | - rc = do_readv_writev(ORANGEFS_IO_WRITE, |
|---|
| 454 | | - file, |
|---|
| 455 | | - &pos, |
|---|
| 456 | | - iter); |
|---|
| 457 | | - if (rc < 0) { |
|---|
| 458 | | - gossip_err("%s: do_readv_writev failed, rc:%zd:.\n", |
|---|
| 459 | | - __func__, rc); |
|---|
| 460 | | - goto out; |
|---|
| 461 | | - } |
|---|
| 462 | | - |
|---|
| 463 | | - iocb->ki_pos = pos; |
|---|
| 365 | + int ret; |
|---|
| 464 | 366 | orangefs_stats.writes++; |
|---|
| 465 | 367 | |
|---|
| 466 | | -out: |
|---|
| 368 | + if (iocb->ki_pos > i_size_read(file_inode(iocb->ki_filp))) { |
|---|
| 369 | + ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp)); |
|---|
| 370 | + if (ret) |
|---|
| 371 | + return ret; |
|---|
| 372 | + } |
|---|
| 467 | 373 | |
|---|
| 468 | | - inode_unlock(file->f_mapping->host); |
|---|
| 469 | | - return rc; |
|---|
| 374 | + ret = generic_file_write_iter(iocb, iter); |
|---|
| 375 | + return ret; |
|---|
| 376 | +} |
|---|
| 377 | + |
|---|
| 378 | +static int orangefs_getflags(struct inode *inode, unsigned long *uval) |
|---|
| 379 | +{ |
|---|
| 380 | + __u64 val = 0; |
|---|
| 381 | + int ret; |
|---|
| 382 | + |
|---|
| 383 | + ret = orangefs_inode_getxattr(inode, |
|---|
| 384 | + "user.pvfs2.meta_hint", |
|---|
| 385 | + &val, sizeof(val)); |
|---|
| 386 | + if (ret < 0 && ret != -ENODATA) |
|---|
| 387 | + return ret; |
|---|
| 388 | + else if (ret == -ENODATA) |
|---|
| 389 | + val = 0; |
|---|
| 390 | + *uval = val; |
|---|
| 391 | + return 0; |
|---|
| 470 | 392 | } |
|---|
| 471 | 393 | |
|---|
| 472 | 394 | /* |
|---|
| .. | .. |
|---|
| 474 | 396 | */ |
|---|
| 475 | 397 | static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
|---|
| 476 | 398 | { |
|---|
| 399 | + struct inode *inode = file_inode(file); |
|---|
| 477 | 400 | int ret = -ENOTTY; |
|---|
| 478 | 401 | __u64 val = 0; |
|---|
| 479 | 402 | unsigned long uval; |
|---|
| .. | .. |
|---|
| 487 | 410 | * and append flags |
|---|
| 488 | 411 | */ |
|---|
| 489 | 412 | if (cmd == FS_IOC_GETFLAGS) { |
|---|
| 490 | | - val = 0; |
|---|
| 491 | | - ret = orangefs_inode_getxattr(file_inode(file), |
|---|
| 492 | | - "user.pvfs2.meta_hint", |
|---|
| 493 | | - &val, sizeof(val)); |
|---|
| 494 | | - if (ret < 0 && ret != -ENODATA) |
|---|
| 413 | + ret = orangefs_getflags(inode, &uval); |
|---|
| 414 | + if (ret) |
|---|
| 495 | 415 | return ret; |
|---|
| 496 | | - else if (ret == -ENODATA) |
|---|
| 497 | | - val = 0; |
|---|
| 498 | | - uval = val; |
|---|
| 499 | 416 | gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 500 | 417 | "orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n", |
|---|
| 501 | 418 | (unsigned long long)uval); |
|---|
| 502 | 419 | return put_user(uval, (int __user *)arg); |
|---|
| 503 | 420 | } else if (cmd == FS_IOC_SETFLAGS) { |
|---|
| 421 | + unsigned long old_uval; |
|---|
| 422 | + |
|---|
| 504 | 423 | ret = 0; |
|---|
| 505 | 424 | if (get_user(uval, (int __user *)arg)) |
|---|
| 506 | 425 | return -EFAULT; |
|---|
| .. | .. |
|---|
| 516 | 435 | gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n"); |
|---|
| 517 | 436 | return -EINVAL; |
|---|
| 518 | 437 | } |
|---|
| 438 | + ret = orangefs_getflags(inode, &old_uval); |
|---|
| 439 | + if (ret) |
|---|
| 440 | + return ret; |
|---|
| 441 | + ret = vfs_ioc_setflags_prepare(inode, old_uval, uval); |
|---|
| 442 | + if (ret) |
|---|
| 443 | + return ret; |
|---|
| 519 | 444 | val = uval; |
|---|
| 520 | 445 | gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 521 | 446 | "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n", |
|---|
| 522 | 447 | (unsigned long long)val); |
|---|
| 523 | | - ret = orangefs_inode_setxattr(file_inode(file), |
|---|
| 448 | + ret = orangefs_inode_setxattr(inode, |
|---|
| 524 | 449 | "user.pvfs2.meta_hint", |
|---|
| 525 | 450 | &val, sizeof(val), 0); |
|---|
| 526 | 451 | } |
|---|
| .. | .. |
|---|
| 532 | 457 | { |
|---|
| 533 | 458 | struct file *file = vmf->vma->vm_file; |
|---|
| 534 | 459 | int ret; |
|---|
| 535 | | - |
|---|
| 536 | | - ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1, |
|---|
| 537 | | - STATX_SIZE); |
|---|
| 460 | + ret = orangefs_inode_getattr(file->f_mapping->host, |
|---|
| 461 | + ORANGEFS_GETATTR_SIZE); |
|---|
| 538 | 462 | if (ret == -ESTALE) |
|---|
| 539 | 463 | ret = -EIO; |
|---|
| 540 | 464 | if (ret) { |
|---|
| 541 | | - gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n", |
|---|
| 542 | | - __func__, ret); |
|---|
| 465 | + gossip_err("%s: orangefs_inode_getattr failed, " |
|---|
| 466 | + "ret:%d:.\n", __func__, ret); |
|---|
| 543 | 467 | return VM_FAULT_SIGBUS; |
|---|
| 544 | 468 | } |
|---|
| 545 | 469 | return filemap_fault(vmf); |
|---|
| .. | .. |
|---|
| 548 | 472 | static const struct vm_operations_struct orangefs_file_vm_ops = { |
|---|
| 549 | 473 | .fault = orangefs_fault, |
|---|
| 550 | 474 | .map_pages = filemap_map_pages, |
|---|
| 551 | | - .page_mkwrite = filemap_page_mkwrite, |
|---|
| 475 | + .page_mkwrite = orangefs_page_mkwrite, |
|---|
| 552 | 476 | }; |
|---|
| 553 | 477 | |
|---|
| 554 | 478 | /* |
|---|
| .. | .. |
|---|
| 556 | 480 | */ |
|---|
| 557 | 481 | static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) |
|---|
| 558 | 482 | { |
|---|
| 483 | + int ret; |
|---|
| 484 | + |
|---|
| 485 | + ret = orangefs_revalidate_mapping(file_inode(file)); |
|---|
| 486 | + if (ret) |
|---|
| 487 | + return ret; |
|---|
| 488 | + |
|---|
| 559 | 489 | gossip_debug(GOSSIP_FILE_DEBUG, |
|---|
| 560 | 490 | "orangefs_file_mmap: called on %s\n", |
|---|
| 561 | 491 | (file ? |
|---|
| 562 | 492 | (char *)file->f_path.dentry->d_name.name : |
|---|
| 563 | 493 | (char *)"Unknown")); |
|---|
| 564 | | - |
|---|
| 565 | | - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) |
|---|
| 566 | | - return -EINVAL; |
|---|
| 567 | 494 | |
|---|
| 568 | 495 | /* set the sequential readahead hint */ |
|---|
| 569 | 496 | vma->vm_flags |= VM_SEQ_READ; |
|---|
| .. | .. |
|---|
| 604 | 531 | gossip_debug(GOSSIP_INODE_DEBUG, |
|---|
| 605 | 532 | "flush_racache finished\n"); |
|---|
| 606 | 533 | } |
|---|
| 607 | | - truncate_inode_pages(file_inode(file)->i_mapping, |
|---|
| 608 | | - 0); |
|---|
| 534 | + |
|---|
| 609 | 535 | } |
|---|
| 610 | 536 | return 0; |
|---|
| 611 | 537 | } |
|---|
| .. | .. |
|---|
| 622 | 548 | struct orangefs_inode_s *orangefs_inode = |
|---|
| 623 | 549 | ORANGEFS_I(file_inode(file)); |
|---|
| 624 | 550 | struct orangefs_kernel_op_s *new_op = NULL; |
|---|
| 551 | + |
|---|
| 552 | + ret = filemap_write_and_wait_range(file_inode(file)->i_mapping, |
|---|
| 553 | + start, end); |
|---|
| 554 | + if (ret < 0) |
|---|
| 555 | + return ret; |
|---|
| 625 | 556 | |
|---|
| 626 | 557 | new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC); |
|---|
| 627 | 558 | if (!new_op) |
|---|
| .. | .. |
|---|
| 644 | 575 | * Change the file pointer position for an instance of an open file. |
|---|
| 645 | 576 | * |
|---|
| 646 | 577 | * \note If .llseek is overriden, we must acquire lock as described in |
|---|
| 647 | | - * Documentation/filesystems/Locking. |
|---|
| 578 | + * Documentation/filesystems/locking.rst. |
|---|
| 648 | 579 | * |
|---|
| 649 | 580 | * Future upgrade could support SEEK_DATA and SEEK_HOLE but would |
|---|
| 650 | 581 | * require much changes to the FS |
|---|
| .. | .. |
|---|
| 660 | 591 | * NOTE: We are only interested in file size here, |
|---|
| 661 | 592 | * so we set mask accordingly. |
|---|
| 662 | 593 | */ |
|---|
| 663 | | - ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1, |
|---|
| 664 | | - STATX_SIZE); |
|---|
| 594 | + ret = orangefs_inode_getattr(file->f_mapping->host, |
|---|
| 595 | + ORANGEFS_GETATTR_SIZE); |
|---|
| 665 | 596 | if (ret == -ESTALE) |
|---|
| 666 | 597 | ret = -EIO; |
|---|
| 667 | 598 | if (ret) { |
|---|
| .. | .. |
|---|
| 704 | 635 | return rc; |
|---|
| 705 | 636 | } |
|---|
| 706 | 637 | |
|---|
| 638 | +static int orangefs_flush(struct file *file, fl_owner_t id) |
|---|
| 639 | +{ |
|---|
| 640 | + /* |
|---|
| 641 | + * This is vfs_fsync_range(file, 0, LLONG_MAX, 0) without the |
|---|
| 642 | + * service_operation in orangefs_fsync. |
|---|
| 643 | + * |
|---|
| 644 | + * Do not send fsync to OrangeFS server on a close. Do send fsync |
|---|
| 645 | + * on an explicit fsync call. This duplicates historical OrangeFS |
|---|
| 646 | + * behavior. |
|---|
| 647 | + */ |
|---|
| 648 | + int r; |
|---|
| 649 | + |
|---|
| 650 | + r = filemap_write_and_wait_range(file->f_mapping, 0, LLONG_MAX); |
|---|
| 651 | + if (r > 0) |
|---|
| 652 | + return 0; |
|---|
| 653 | + else |
|---|
| 654 | + return r; |
|---|
| 655 | +} |
|---|
| 656 | + |
|---|
| 707 | 657 | /** ORANGEFS implementation of VFS file operations */ |
|---|
| 708 | 658 | const struct file_operations orangefs_file_operations = { |
|---|
| 709 | 659 | .llseek = orangefs_file_llseek, |
|---|
| .. | .. |
|---|
| 713 | 663 | .unlocked_ioctl = orangefs_ioctl, |
|---|
| 714 | 664 | .mmap = orangefs_file_mmap, |
|---|
| 715 | 665 | .open = generic_file_open, |
|---|
| 666 | + .flush = orangefs_flush, |
|---|
| 716 | 667 | .release = orangefs_file_release, |
|---|
| 717 | 668 | .fsync = orangefs_fsync, |
|---|
| 718 | 669 | }; |
|---|