.. | .. |
---|
1 | 1 | // SPDX-License-Identifier: GPL-2.0 |
---|
2 | 2 | /* |
---|
3 | 3 | * (C) 2001 Clemson University and The University of Chicago |
---|
| 4 | + * Copyright 2018 Omnibond Systems, L.L.C. |
---|
4 | 5 | * |
---|
5 | 6 | * See COPYING in top-level directory. |
---|
6 | 7 | */ |
---|
.. | .. |
---|
44 | 45 | /* |
---|
45 | 46 | * Post and wait for the I/O upcall to finish |
---|
46 | 47 | */ |
---|
47 | | -static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, |
---|
48 | | - loff_t *offset, struct iov_iter *iter, |
---|
49 | | - size_t total_size, loff_t readahead_size) |
---|
| 48 | +ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, |
---|
| 49 | + loff_t *offset, struct iov_iter *iter, size_t total_size, |
---|
| 50 | + loff_t readahead_size, struct orangefs_write_range *wr, |
---|
| 51 | + int *index_return, struct file *file) |
---|
50 | 52 | { |
---|
51 | 53 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
---|
52 | 54 | struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; |
---|
53 | 55 | struct orangefs_kernel_op_s *new_op = NULL; |
---|
54 | | - int buffer_index = -1; |
---|
| 56 | + int buffer_index; |
---|
55 | 57 | ssize_t ret; |
---|
| 58 | + size_t copy_amount; |
---|
| 59 | + int open_for_read; |
---|
| 60 | + int open_for_write; |
---|
56 | 61 | |
---|
57 | 62 | new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO); |
---|
58 | 63 | if (!new_op) |
---|
.. | .. |
---|
84 | 89 | new_op->upcall.req.io.buf_index = buffer_index; |
---|
85 | 90 | new_op->upcall.req.io.count = total_size; |
---|
86 | 91 | new_op->upcall.req.io.offset = *offset; |
---|
| 92 | + if (type == ORANGEFS_IO_WRITE && wr) { |
---|
| 93 | + new_op->upcall.uid = from_kuid(&init_user_ns, wr->uid); |
---|
| 94 | + new_op->upcall.gid = from_kgid(&init_user_ns, wr->gid); |
---|
| 95 | + } |
---|
| 96 | + /* |
---|
| 97 | + * Orangefs has no open, and orangefs checks file permissions |
---|
| 98 | + * on each file access. Posix requires that file permissions |
---|
| 99 | + * be checked on open and nowhere else. Orangefs-through-the-kernel |
---|
| 100 | + * needs to seem posix compliant. |
---|
| 101 | + * |
---|
| 102 | + * The VFS opens files, even if the filesystem provides no |
---|
| 103 | + * method. We can see if a file was successfully opened for |
---|
| 104 | + * read and or for write by looking at file->f_mode. |
---|
| 105 | + * |
---|
| 106 | + * When writes are flowing from the page cache, file is no |
---|
| 107 | + * longer available. We can trust the VFS to have checked |
---|
| 108 | + * file->f_mode before writing to the page cache. |
---|
| 109 | + * |
---|
| 110 | + * The mode of a file might change between when it is opened |
---|
| 111 | + * and IO commences, or it might be created with an arbitrary mode. |
---|
| 112 | + * |
---|
| 113 | + * We'll make sure we don't hit EACCES during the IO stage by |
---|
| 114 | + * using UID 0. Some of the time we have access without changing |
---|
| 115 | + * to UID 0 - how to check? |
---|
| 116 | + */ |
---|
| 117 | + if (file) { |
---|
| 118 | + open_for_write = file->f_mode & FMODE_WRITE; |
---|
| 119 | + open_for_read = file->f_mode & FMODE_READ; |
---|
| 120 | + } else { |
---|
| 121 | + open_for_write = 1; |
---|
| 122 | + open_for_read = 0; /* not relevant? */ |
---|
| 123 | + } |
---|
| 124 | + if ((type == ORANGEFS_IO_WRITE) && open_for_write) |
---|
| 125 | + new_op->upcall.uid = 0; |
---|
| 126 | + if ((type == ORANGEFS_IO_READ) && open_for_read) |
---|
| 127 | + new_op->upcall.uid = 0; |
---|
87 | 128 | |
---|
88 | 129 | gossip_debug(GOSSIP_FILE_DEBUG, |
---|
89 | 130 | "%s(%pU): offset: %llu total_size: %zd\n", |
---|
.. | .. |
---|
128 | 169 | */ |
---|
129 | 170 | if (ret == -EAGAIN && op_state_purged(new_op)) { |
---|
130 | 171 | orangefs_bufmap_put(buffer_index); |
---|
131 | | - buffer_index = -1; |
---|
132 | 172 | if (type == ORANGEFS_IO_WRITE) |
---|
133 | 173 | iov_iter_revert(iter, total_size); |
---|
134 | 174 | gossip_debug(GOSSIP_FILE_DEBUG, |
---|
.. | .. |
---|
168 | 208 | * trigger the write. |
---|
169 | 209 | */ |
---|
170 | 210 | case OP_VFS_STATE_INPROGR: |
---|
171 | | - ret = total_size; |
---|
| 211 | + if (type == ORANGEFS_IO_READ) |
---|
| 212 | + ret = -EINTR; |
---|
| 213 | + else |
---|
| 214 | + ret = total_size; |
---|
172 | 215 | break; |
---|
173 | 216 | default: |
---|
174 | 217 | gossip_err("%s: unexpected op state :%d:.\n", |
---|
.. | .. |
---|
204 | 247 | * can futher be kernel-space or user-space addresses. |
---|
205 | 248 | * or it can pointers to struct page's |
---|
206 | 249 | */ |
---|
| 250 | + |
---|
| 251 | + /* |
---|
| 252 | + * When reading, readahead_size will only be zero when |
---|
| 253 | + * we're doing O_DIRECT, otherwise we got here from |
---|
| 254 | + * orangefs_readpage. |
---|
| 255 | + * |
---|
| 256 | + * If we got here from orangefs_readpage we want to |
---|
| 257 | + * copy either a page or the whole file into the io |
---|
| 258 | + * vector, whichever is smaller. |
---|
| 259 | + */ |
---|
| 260 | + if (readahead_size) |
---|
| 261 | + copy_amount = |
---|
| 262 | + min(new_op->downcall.resp.io.amt_complete, |
---|
| 263 | + (__s64)PAGE_SIZE); |
---|
| 264 | + else |
---|
| 265 | + copy_amount = new_op->downcall.resp.io.amt_complete; |
---|
| 266 | + |
---|
207 | 267 | ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index, |
---|
208 | | - new_op->downcall.resp.io.amt_complete); |
---|
| 268 | + copy_amount); |
---|
209 | 269 | if (ret < 0) { |
---|
210 | 270 | gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", |
---|
211 | 271 | __func__, (long)ret); |
---|
.. | .. |
---|
223 | 283 | |
---|
224 | 284 | out: |
---|
225 | 285 | if (buffer_index >= 0) { |
---|
226 | | - orangefs_bufmap_put(buffer_index); |
---|
227 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
---|
228 | | - "%s(%pU): PUT buffer_index %d\n", |
---|
229 | | - __func__, handle, buffer_index); |
---|
230 | | - buffer_index = -1; |
---|
| 286 | + if ((readahead_size) && (type == ORANGEFS_IO_READ)) { |
---|
| 287 | + /* readpage */ |
---|
| 288 | + *index_return = buffer_index; |
---|
| 289 | + gossip_debug(GOSSIP_FILE_DEBUG, |
---|
| 290 | + "%s: hold on to buffer_index :%d:\n", |
---|
| 291 | + __func__, buffer_index); |
---|
| 292 | + } else { |
---|
| 293 | + /* O_DIRECT */ |
---|
| 294 | + orangefs_bufmap_put(buffer_index); |
---|
| 295 | + gossip_debug(GOSSIP_FILE_DEBUG, |
---|
| 296 | + "%s(%pU): PUT buffer_index %d\n", |
---|
| 297 | + __func__, handle, buffer_index); |
---|
| 298 | + } |
---|
231 | 299 | } |
---|
232 | 300 | op_release(new_op); |
---|
233 | 301 | return ret; |
---|
234 | 302 | } |
---|
235 | 303 | |
---|
236 | | -/* |
---|
237 | | - * Common entry point for read/write/readv/writev |
---|
238 | | - * This function will dispatch it to either the direct I/O |
---|
239 | | - * or buffered I/O path depending on the mount options and/or |
---|
240 | | - * augmented/extended metadata attached to the file. |
---|
241 | | - * Note: File extended attributes override any mount options. |
---|
242 | | - */ |
---|
243 | | -static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file, |
---|
244 | | - loff_t *offset, struct iov_iter *iter) |
---|
| 304 | +int orangefs_revalidate_mapping(struct inode *inode) |
---|
245 | 305 | { |
---|
246 | | - struct inode *inode = file->f_mapping->host; |
---|
247 | 306 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
---|
248 | | - struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; |
---|
249 | | - size_t count = iov_iter_count(iter); |
---|
250 | | - ssize_t total_count = 0; |
---|
251 | | - ssize_t ret = -EINVAL; |
---|
| 307 | + struct address_space *mapping = inode->i_mapping; |
---|
| 308 | + unsigned long *bitlock = &orangefs_inode->bitlock; |
---|
| 309 | + int ret; |
---|
252 | 310 | |
---|
253 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
---|
254 | | - "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", |
---|
255 | | - __func__, |
---|
256 | | - handle, |
---|
257 | | - (int)count); |
---|
258 | | - |
---|
259 | | - if (type == ORANGEFS_IO_WRITE) { |
---|
260 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
---|
261 | | - "%s(%pU): proceeding with offset : %llu, " |
---|
262 | | - "size %d\n", |
---|
263 | | - __func__, |
---|
264 | | - handle, |
---|
265 | | - llu(*offset), |
---|
266 | | - (int)count); |
---|
267 | | - } |
---|
268 | | - |
---|
269 | | - if (count == 0) { |
---|
270 | | - ret = 0; |
---|
271 | | - goto out; |
---|
272 | | - } |
---|
273 | | - |
---|
274 | | - while (iov_iter_count(iter)) { |
---|
275 | | - size_t each_count = iov_iter_count(iter); |
---|
276 | | - size_t amt_complete; |
---|
277 | | - |
---|
278 | | - /* how much to transfer in this loop iteration */ |
---|
279 | | - if (each_count > orangefs_bufmap_size_query()) |
---|
280 | | - each_count = orangefs_bufmap_size_query(); |
---|
281 | | - |
---|
282 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
---|
283 | | - "%s(%pU): size of each_count(%d)\n", |
---|
284 | | - __func__, |
---|
285 | | - handle, |
---|
286 | | - (int)each_count); |
---|
287 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
---|
288 | | - "%s(%pU): BEFORE wait_for_io: offset is %d\n", |
---|
289 | | - __func__, |
---|
290 | | - handle, |
---|
291 | | - (int)*offset); |
---|
292 | | - |
---|
293 | | - ret = wait_for_direct_io(type, inode, offset, iter, |
---|
294 | | - each_count, 0); |
---|
295 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
---|
296 | | - "%s(%pU): return from wait_for_io:%d\n", |
---|
297 | | - __func__, |
---|
298 | | - handle, |
---|
299 | | - (int)ret); |
---|
300 | | - |
---|
301 | | - if (ret < 0) |
---|
302 | | - goto out; |
---|
303 | | - |
---|
304 | | - *offset += ret; |
---|
305 | | - total_count += ret; |
---|
306 | | - amt_complete = ret; |
---|
307 | | - |
---|
308 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
---|
309 | | - "%s(%pU): AFTER wait_for_io: offset is %d\n", |
---|
310 | | - __func__, |
---|
311 | | - handle, |
---|
312 | | - (int)*offset); |
---|
313 | | - |
---|
314 | | - /* |
---|
315 | | - * if we got a short I/O operations, |
---|
316 | | - * fall out and return what we got so far |
---|
317 | | - */ |
---|
318 | | - if (amt_complete < each_count) |
---|
| 311 | + while (1) { |
---|
| 312 | + ret = wait_on_bit(bitlock, 1, TASK_KILLABLE); |
---|
| 313 | + if (ret) |
---|
| 314 | + return ret; |
---|
| 315 | + spin_lock(&inode->i_lock); |
---|
| 316 | + if (test_bit(1, bitlock)) { |
---|
| 317 | + spin_unlock(&inode->i_lock); |
---|
| 318 | + continue; |
---|
| 319 | + } |
---|
| 320 | + if (!time_before(jiffies, orangefs_inode->mapping_time)) |
---|
319 | 321 | break; |
---|
320 | | - } /*end while */ |
---|
| 322 | + spin_unlock(&inode->i_lock); |
---|
| 323 | + return 0; |
---|
| 324 | + } |
---|
321 | 325 | |
---|
| 326 | + set_bit(1, bitlock); |
---|
| 327 | + smp_wmb(); |
---|
| 328 | + spin_unlock(&inode->i_lock); |
---|
| 329 | + |
---|
| 330 | + unmap_mapping_range(mapping, 0, 0, 0); |
---|
| 331 | + ret = filemap_write_and_wait(mapping); |
---|
| 332 | + if (!ret) |
---|
| 333 | + ret = invalidate_inode_pages2(mapping); |
---|
| 334 | + |
---|
| 335 | + orangefs_inode->mapping_time = jiffies + |
---|
| 336 | + orangefs_cache_timeout_msecs*HZ/1000; |
---|
| 337 | + |
---|
| 338 | + clear_bit(1, bitlock); |
---|
| 339 | + smp_mb__after_atomic(); |
---|
| 340 | + wake_up_bit(bitlock, 1); |
---|
| 341 | + |
---|
| 342 | + return ret; |
---|
| 343 | +} |
---|
| 344 | + |
---|
| 345 | +static ssize_t orangefs_file_read_iter(struct kiocb *iocb, |
---|
| 346 | + struct iov_iter *iter) |
---|
| 347 | +{ |
---|
| 348 | + int ret; |
---|
| 349 | + orangefs_stats.reads++; |
---|
| 350 | + |
---|
| 351 | + down_read(&file_inode(iocb->ki_filp)->i_rwsem); |
---|
| 352 | + ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp)); |
---|
| 353 | + if (ret) |
---|
| 354 | + goto out; |
---|
| 355 | + |
---|
| 356 | + ret = generic_file_read_iter(iocb, iter); |
---|
322 | 357 | out: |
---|
323 | | - if (total_count > 0) |
---|
324 | | - ret = total_count; |
---|
325 | | - if (ret > 0) { |
---|
326 | | - if (type == ORANGEFS_IO_READ) { |
---|
327 | | - file_accessed(file); |
---|
328 | | - } else { |
---|
329 | | - file_update_time(file); |
---|
330 | | - /* |
---|
331 | | - * Must invalidate to ensure write loop doesn't |
---|
332 | | - * prevent kernel from reading updated |
---|
333 | | - * attribute. Size probably changed because of |
---|
334 | | - * the write, and other clients could update |
---|
335 | | - * any other attribute. |
---|
336 | | - */ |
---|
337 | | - orangefs_inode->getattr_time = jiffies - 1; |
---|
338 | | - } |
---|
339 | | - } |
---|
340 | | - |
---|
341 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
---|
342 | | - "%s(%pU): Value(%d) returned.\n", |
---|
343 | | - __func__, |
---|
344 | | - handle, |
---|
345 | | - (int)ret); |
---|
346 | | - |
---|
| 358 | + up_read(&file_inode(iocb->ki_filp)->i_rwsem); |
---|
347 | 359 | return ret; |
---|
348 | 360 | } |
---|
349 | 361 | |
---|
350 | | -/* |
---|
351 | | - * Read data from a specified offset in a file (referenced by inode). |
---|
352 | | - * Data may be placed either in a user or kernel buffer. |
---|
353 | | - */ |
---|
354 | | -ssize_t orangefs_inode_read(struct inode *inode, |
---|
355 | | - struct iov_iter *iter, |
---|
356 | | - loff_t *offset, |
---|
357 | | - loff_t readahead_size) |
---|
| 362 | +static ssize_t orangefs_file_write_iter(struct kiocb *iocb, |
---|
| 363 | + struct iov_iter *iter) |
---|
358 | 364 | { |
---|
359 | | - struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
---|
360 | | - size_t count = iov_iter_count(iter); |
---|
361 | | - size_t bufmap_size; |
---|
362 | | - ssize_t ret = -EINVAL; |
---|
363 | | - |
---|
364 | | - orangefs_stats.reads++; |
---|
365 | | - |
---|
366 | | - bufmap_size = orangefs_bufmap_size_query(); |
---|
367 | | - if (count > bufmap_size) { |
---|
368 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
---|
369 | | - "%s: count is too large (%zd/%zd)!\n", |
---|
370 | | - __func__, count, bufmap_size); |
---|
371 | | - return -EINVAL; |
---|
372 | | - } |
---|
373 | | - |
---|
374 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
---|
375 | | - "%s(%pU) %zd@%llu\n", |
---|
376 | | - __func__, |
---|
377 | | - &orangefs_inode->refn.khandle, |
---|
378 | | - count, |
---|
379 | | - llu(*offset)); |
---|
380 | | - |
---|
381 | | - ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, offset, iter, |
---|
382 | | - count, readahead_size); |
---|
383 | | - if (ret > 0) |
---|
384 | | - *offset += ret; |
---|
385 | | - |
---|
386 | | - gossip_debug(GOSSIP_FILE_DEBUG, |
---|
387 | | - "%s(%pU): Value(%zd) returned.\n", |
---|
388 | | - __func__, |
---|
389 | | - &orangefs_inode->refn.khandle, |
---|
390 | | - ret); |
---|
391 | | - |
---|
392 | | - return ret; |
---|
393 | | -} |
---|
394 | | - |
---|
395 | | -static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) |
---|
396 | | -{ |
---|
397 | | - struct file *file = iocb->ki_filp; |
---|
398 | | - loff_t pos = iocb->ki_pos; |
---|
399 | | - ssize_t rc = 0; |
---|
400 | | - |
---|
401 | | - BUG_ON(iocb->private); |
---|
402 | | - |
---|
403 | | - gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_read_iter\n"); |
---|
404 | | - |
---|
405 | | - orangefs_stats.reads++; |
---|
406 | | - |
---|
407 | | - rc = do_readv_writev(ORANGEFS_IO_READ, file, &pos, iter); |
---|
408 | | - iocb->ki_pos = pos; |
---|
409 | | - |
---|
410 | | - return rc; |
---|
411 | | -} |
---|
412 | | - |
---|
413 | | -static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) |
---|
414 | | -{ |
---|
415 | | - struct file *file = iocb->ki_filp; |
---|
416 | | - loff_t pos; |
---|
417 | | - ssize_t rc; |
---|
418 | | - |
---|
419 | | - BUG_ON(iocb->private); |
---|
420 | | - |
---|
421 | | - gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_write_iter\n"); |
---|
422 | | - |
---|
423 | | - inode_lock(file->f_mapping->host); |
---|
424 | | - |
---|
425 | | - /* Make sure generic_write_checks sees an up to date inode size. */ |
---|
426 | | - if (file->f_flags & O_APPEND) { |
---|
427 | | - rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1, |
---|
428 | | - STATX_SIZE); |
---|
429 | | - if (rc == -ESTALE) |
---|
430 | | - rc = -EIO; |
---|
431 | | - if (rc) { |
---|
432 | | - gossip_err("%s: orangefs_inode_getattr failed, " |
---|
433 | | - "rc:%zd:.\n", __func__, rc); |
---|
434 | | - goto out; |
---|
435 | | - } |
---|
436 | | - } |
---|
437 | | - |
---|
438 | | - rc = generic_write_checks(iocb, iter); |
---|
439 | | - |
---|
440 | | - if (rc <= 0) { |
---|
441 | | - gossip_err("%s: generic_write_checks failed, rc:%zd:.\n", |
---|
442 | | - __func__, rc); |
---|
443 | | - goto out; |
---|
444 | | - } |
---|
445 | | - |
---|
446 | | - /* |
---|
447 | | - * if we are appending, generic_write_checks would have updated |
---|
448 | | - * pos to the end of the file, so we will wait till now to set |
---|
449 | | - * pos... |
---|
450 | | - */ |
---|
451 | | - pos = iocb->ki_pos; |
---|
452 | | - |
---|
453 | | - rc = do_readv_writev(ORANGEFS_IO_WRITE, |
---|
454 | | - file, |
---|
455 | | - &pos, |
---|
456 | | - iter); |
---|
457 | | - if (rc < 0) { |
---|
458 | | - gossip_err("%s: do_readv_writev failed, rc:%zd:.\n", |
---|
459 | | - __func__, rc); |
---|
460 | | - goto out; |
---|
461 | | - } |
---|
462 | | - |
---|
463 | | - iocb->ki_pos = pos; |
---|
| 365 | + int ret; |
---|
464 | 366 | orangefs_stats.writes++; |
---|
465 | 367 | |
---|
466 | | -out: |
---|
| 368 | + if (iocb->ki_pos > i_size_read(file_inode(iocb->ki_filp))) { |
---|
| 369 | + ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp)); |
---|
| 370 | + if (ret) |
---|
| 371 | + return ret; |
---|
| 372 | + } |
---|
467 | 373 | |
---|
468 | | - inode_unlock(file->f_mapping->host); |
---|
469 | | - return rc; |
---|
| 374 | + ret = generic_file_write_iter(iocb, iter); |
---|
| 375 | + return ret; |
---|
| 376 | +} |
---|
| 377 | + |
---|
| 378 | +static int orangefs_getflags(struct inode *inode, unsigned long *uval) |
---|
| 379 | +{ |
---|
| 380 | + __u64 val = 0; |
---|
| 381 | + int ret; |
---|
| 382 | + |
---|
| 383 | + ret = orangefs_inode_getxattr(inode, |
---|
| 384 | + "user.pvfs2.meta_hint", |
---|
| 385 | + &val, sizeof(val)); |
---|
| 386 | + if (ret < 0 && ret != -ENODATA) |
---|
| 387 | + return ret; |
---|
| 388 | + else if (ret == -ENODATA) |
---|
| 389 | + val = 0; |
---|
| 390 | + *uval = val; |
---|
| 391 | + return 0; |
---|
470 | 392 | } |
---|
471 | 393 | |
---|
472 | 394 | /* |
---|
.. | .. |
---|
474 | 396 | */ |
---|
475 | 397 | static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
---|
476 | 398 | { |
---|
| 399 | + struct inode *inode = file_inode(file); |
---|
477 | 400 | int ret = -ENOTTY; |
---|
478 | 401 | __u64 val = 0; |
---|
479 | 402 | unsigned long uval; |
---|
.. | .. |
---|
487 | 410 | * and append flags |
---|
488 | 411 | */ |
---|
489 | 412 | if (cmd == FS_IOC_GETFLAGS) { |
---|
490 | | - val = 0; |
---|
491 | | - ret = orangefs_inode_getxattr(file_inode(file), |
---|
492 | | - "user.pvfs2.meta_hint", |
---|
493 | | - &val, sizeof(val)); |
---|
494 | | - if (ret < 0 && ret != -ENODATA) |
---|
| 413 | + ret = orangefs_getflags(inode, &uval); |
---|
| 414 | + if (ret) |
---|
495 | 415 | return ret; |
---|
496 | | - else if (ret == -ENODATA) |
---|
497 | | - val = 0; |
---|
498 | | - uval = val; |
---|
499 | 416 | gossip_debug(GOSSIP_FILE_DEBUG, |
---|
500 | 417 | "orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n", |
---|
501 | 418 | (unsigned long long)uval); |
---|
502 | 419 | return put_user(uval, (int __user *)arg); |
---|
503 | 420 | } else if (cmd == FS_IOC_SETFLAGS) { |
---|
| 421 | + unsigned long old_uval; |
---|
| 422 | + |
---|
504 | 423 | ret = 0; |
---|
505 | 424 | if (get_user(uval, (int __user *)arg)) |
---|
506 | 425 | return -EFAULT; |
---|
.. | .. |
---|
516 | 435 | gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n"); |
---|
517 | 436 | return -EINVAL; |
---|
518 | 437 | } |
---|
| 438 | + ret = orangefs_getflags(inode, &old_uval); |
---|
| 439 | + if (ret) |
---|
| 440 | + return ret; |
---|
| 441 | + ret = vfs_ioc_setflags_prepare(inode, old_uval, uval); |
---|
| 442 | + if (ret) |
---|
| 443 | + return ret; |
---|
519 | 444 | val = uval; |
---|
520 | 445 | gossip_debug(GOSSIP_FILE_DEBUG, |
---|
521 | 446 | "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n", |
---|
522 | 447 | (unsigned long long)val); |
---|
523 | | - ret = orangefs_inode_setxattr(file_inode(file), |
---|
| 448 | + ret = orangefs_inode_setxattr(inode, |
---|
524 | 449 | "user.pvfs2.meta_hint", |
---|
525 | 450 | &val, sizeof(val), 0); |
---|
526 | 451 | } |
---|
.. | .. |
---|
532 | 457 | { |
---|
533 | 458 | struct file *file = vmf->vma->vm_file; |
---|
534 | 459 | int ret; |
---|
535 | | - |
---|
536 | | - ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1, |
---|
537 | | - STATX_SIZE); |
---|
| 460 | + ret = orangefs_inode_getattr(file->f_mapping->host, |
---|
| 461 | + ORANGEFS_GETATTR_SIZE); |
---|
538 | 462 | if (ret == -ESTALE) |
---|
539 | 463 | ret = -EIO; |
---|
540 | 464 | if (ret) { |
---|
541 | | - gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n", |
---|
542 | | - __func__, ret); |
---|
| 465 | + gossip_err("%s: orangefs_inode_getattr failed, " |
---|
| 466 | + "ret:%d:.\n", __func__, ret); |
---|
543 | 467 | return VM_FAULT_SIGBUS; |
---|
544 | 468 | } |
---|
545 | 469 | return filemap_fault(vmf); |
---|
.. | .. |
---|
548 | 472 | static const struct vm_operations_struct orangefs_file_vm_ops = { |
---|
549 | 473 | .fault = orangefs_fault, |
---|
550 | 474 | .map_pages = filemap_map_pages, |
---|
551 | | - .page_mkwrite = filemap_page_mkwrite, |
---|
| 475 | + .page_mkwrite = orangefs_page_mkwrite, |
---|
552 | 476 | }; |
---|
553 | 477 | |
---|
554 | 478 | /* |
---|
.. | .. |
---|
556 | 480 | */ |
---|
557 | 481 | static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) |
---|
558 | 482 | { |
---|
| 483 | + int ret; |
---|
| 484 | + |
---|
| 485 | + ret = orangefs_revalidate_mapping(file_inode(file)); |
---|
| 486 | + if (ret) |
---|
| 487 | + return ret; |
---|
| 488 | + |
---|
559 | 489 | gossip_debug(GOSSIP_FILE_DEBUG, |
---|
560 | 490 | "orangefs_file_mmap: called on %s\n", |
---|
561 | 491 | (file ? |
---|
562 | 492 | (char *)file->f_path.dentry->d_name.name : |
---|
563 | 493 | (char *)"Unknown")); |
---|
564 | | - |
---|
565 | | - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) |
---|
566 | | - return -EINVAL; |
---|
567 | 494 | |
---|
568 | 495 | /* set the sequential readahead hint */ |
---|
569 | 496 | vma->vm_flags |= VM_SEQ_READ; |
---|
.. | .. |
---|
604 | 531 | gossip_debug(GOSSIP_INODE_DEBUG, |
---|
605 | 532 | "flush_racache finished\n"); |
---|
606 | 533 | } |
---|
607 | | - truncate_inode_pages(file_inode(file)->i_mapping, |
---|
608 | | - 0); |
---|
| 534 | + |
---|
609 | 535 | } |
---|
610 | 536 | return 0; |
---|
611 | 537 | } |
---|
.. | .. |
---|
622 | 548 | struct orangefs_inode_s *orangefs_inode = |
---|
623 | 549 | ORANGEFS_I(file_inode(file)); |
---|
624 | 550 | struct orangefs_kernel_op_s *new_op = NULL; |
---|
| 551 | + |
---|
| 552 | + ret = filemap_write_and_wait_range(file_inode(file)->i_mapping, |
---|
| 553 | + start, end); |
---|
| 554 | + if (ret < 0) |
---|
| 555 | + return ret; |
---|
625 | 556 | |
---|
626 | 557 | new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC); |
---|
627 | 558 | if (!new_op) |
---|
.. | .. |
---|
644 | 575 | * Change the file pointer position for an instance of an open file. |
---|
645 | 576 | * |
---|
646 | 577 | * \note If .llseek is overriden, we must acquire lock as described in |
---|
647 | | - * Documentation/filesystems/Locking. |
---|
| 578 | + * Documentation/filesystems/locking.rst. |
---|
648 | 579 | * |
---|
649 | 580 | * Future upgrade could support SEEK_DATA and SEEK_HOLE but would |
---|
650 | 581 | * require much changes to the FS |
---|
.. | .. |
---|
660 | 591 | * NOTE: We are only interested in file size here, |
---|
661 | 592 | * so we set mask accordingly. |
---|
662 | 593 | */ |
---|
663 | | - ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1, |
---|
664 | | - STATX_SIZE); |
---|
| 594 | + ret = orangefs_inode_getattr(file->f_mapping->host, |
---|
| 595 | + ORANGEFS_GETATTR_SIZE); |
---|
665 | 596 | if (ret == -ESTALE) |
---|
666 | 597 | ret = -EIO; |
---|
667 | 598 | if (ret) { |
---|
.. | .. |
---|
704 | 635 | return rc; |
---|
705 | 636 | } |
---|
706 | 637 | |
---|
| 638 | +static int orangefs_flush(struct file *file, fl_owner_t id) |
---|
| 639 | +{ |
---|
| 640 | + /* |
---|
| 641 | + * This is vfs_fsync_range(file, 0, LLONG_MAX, 0) without the |
---|
| 642 | + * service_operation in orangefs_fsync. |
---|
| 643 | + * |
---|
| 644 | + * Do not send fsync to OrangeFS server on a close. Do send fsync |
---|
| 645 | + * on an explicit fsync call. This duplicates historical OrangeFS |
---|
| 646 | + * behavior. |
---|
| 647 | + */ |
---|
| 648 | + int r; |
---|
| 649 | + |
---|
| 650 | + r = filemap_write_and_wait_range(file->f_mapping, 0, LLONG_MAX); |
---|
| 651 | + if (r > 0) |
---|
| 652 | + return 0; |
---|
| 653 | + else |
---|
| 654 | + return r; |
---|
| 655 | +} |
---|
| 656 | + |
---|
707 | 657 | /** ORANGEFS implementation of VFS file operations */ |
---|
708 | 658 | const struct file_operations orangefs_file_operations = { |
---|
709 | 659 | .llseek = orangefs_file_llseek, |
---|
.. | .. |
---|
713 | 663 | .unlocked_ioctl = orangefs_ioctl, |
---|
714 | 664 | .mmap = orangefs_file_mmap, |
---|
715 | 665 | .open = generic_file_open, |
---|
| 666 | + .flush = orangefs_flush, |
---|
716 | 667 | .release = orangefs_file_release, |
---|
717 | 668 | .fsync = orangefs_fsync, |
---|
718 | 669 | }; |
---|