.. | .. |
---|
19 | 19 | */ |
---|
20 | 20 | #define RDS_PROTOCOL_3_0 0x0300 |
---|
21 | 21 | #define RDS_PROTOCOL_3_1 0x0301 |
---|
| 22 | +#define RDS_PROTOCOL_4_0 0x0400 |
---|
| 23 | +#define RDS_PROTOCOL_4_1 0x0401 |
---|
22 | 24 | #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 |
---|
23 | 25 | #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) |
---|
24 | 26 | #define RDS_PROTOCOL_MINOR(v) ((v) & 255) |
---|
25 | 27 | #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) |
---|
| 28 | +#define RDS_PROTOCOL_COMPAT_VERSION RDS_PROTOCOL_3_1 |
---|
26 | 29 | |
---|
27 | 30 | /* The following ports, 16385, 18634, 18635, are registered with IANA as |
---|
28 | 31 | * the ports to be used for RDS over TCP and UDP. Currently, only RDS over |
---|
.. | .. |
---|
37 | 40 | #ifdef ATOMIC64_INIT |
---|
38 | 41 | #define KERNEL_HAS_ATOMIC64 |
---|
39 | 42 | #endif |
---|
40 | | - |
---|
41 | 43 | #ifdef RDS_DEBUG |
---|
42 | 44 | #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) |
---|
43 | 45 | #else |
---|
.. | .. |
---|
47 | 49 | { |
---|
48 | 50 | } |
---|
49 | 51 | #endif |
---|
50 | | - |
---|
51 | | -/* XXX is there one of these somewhere? */ |
---|
52 | | -#define ceil(x, y) \ |
---|
53 | | - ({ unsigned long __x = (x), __y = (y); (__x + __y - 1) / __y; }) |
---|
54 | 52 | |
---|
55 | 53 | #define RDS_FRAG_SHIFT 12 |
---|
56 | 54 | #define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) |
---|
.. | .. |
---|
155 | 153 | struct rds_cong_map *c_fcong; |
---|
156 | 154 | |
---|
157 | 155 | /* Protocol version */ |
---|
| 156 | + unsigned int c_proposed_version; |
---|
158 | 157 | unsigned int c_version; |
---|
159 | 158 | possible_net_t c_net; |
---|
| 159 | + |
---|
| 160 | + /* TOS */ |
---|
| 161 | + u8 c_tos; |
---|
160 | 162 | |
---|
161 | 163 | struct list_head c_map_item; |
---|
162 | 164 | unsigned long c_map_queued; |
---|
.. | .. |
---|
268 | 270 | #define RDS_MSG_RX_END 2 |
---|
269 | 271 | #define RDS_MSG_RX_CMSG 3 |
---|
270 | 272 | |
---|
| 273 | +/* The following values are whitelisted for usercopy */ |
---|
| 274 | +struct rds_inc_usercopy { |
---|
| 275 | + rds_rdma_cookie_t rdma_cookie; |
---|
| 276 | + ktime_t rx_tstamp; |
---|
| 277 | +}; |
---|
| 278 | + |
---|
271 | 279 | struct rds_incoming { |
---|
272 | 280 | refcount_t i_refcount; |
---|
273 | 281 | struct list_head i_item; |
---|
.. | .. |
---|
277 | 285 | unsigned long i_rx_jiffies; |
---|
278 | 286 | struct in6_addr i_saddr; |
---|
279 | 287 | |
---|
280 | | - rds_rdma_cookie_t i_rdma_cookie; |
---|
281 | | - struct timeval i_rx_tstamp; |
---|
| 288 | + struct rds_inc_usercopy i_usercopy; |
---|
282 | 289 | u64 i_rx_lat_trace[RDS_RX_MAX_TRACES]; |
---|
283 | 290 | }; |
---|
284 | 291 | |
---|
285 | 292 | struct rds_mr { |
---|
286 | 293 | struct rb_node r_rb_node; |
---|
287 | | - refcount_t r_refcount; |
---|
| 294 | + struct kref r_kref; |
---|
288 | 295 | u32 r_key; |
---|
289 | 296 | |
---|
290 | 297 | /* A copy of the creation flags */ |
---|
.. | .. |
---|
292 | 299 | unsigned int r_invalidate:1; |
---|
293 | 300 | unsigned int r_write:1; |
---|
294 | 301 | |
---|
295 | | - /* This is for RDS_MR_DEAD. |
---|
296 | | - * It would be nice & consistent to make this part of the above |
---|
297 | | - * bit field here, but we need to use test_and_set_bit. |
---|
298 | | - */ |
---|
299 | | - unsigned long r_state; |
---|
300 | 302 | struct rds_sock *r_sock; /* back pointer to the socket that owns us */ |
---|
301 | 303 | struct rds_transport *r_trans; |
---|
302 | 304 | void *r_trans_private; |
---|
303 | 305 | }; |
---|
304 | | - |
---|
305 | | -/* Flags for mr->r_state */ |
---|
306 | | -#define RDS_MR_DEAD 0 |
---|
307 | 306 | |
---|
308 | 307 | static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset) |
---|
309 | 308 | { |
---|
.. | .. |
---|
470 | 469 | struct rds_notifier *op_notifier; |
---|
471 | 470 | |
---|
472 | 471 | struct rds_mr *op_rdma_mr; |
---|
| 472 | + |
---|
| 473 | + u64 op_odp_addr; |
---|
| 474 | + struct rds_mr *op_odp_mr; |
---|
473 | 475 | } rdma; |
---|
474 | 476 | struct rm_data_op { |
---|
475 | 477 | unsigned int op_active:1; |
---|
476 | | - unsigned int op_notify:1; |
---|
477 | 478 | unsigned int op_nents; |
---|
478 | 479 | unsigned int op_count; |
---|
479 | 480 | unsigned int op_dmasg; |
---|
.. | .. |
---|
566 | 567 | void (*exit)(void); |
---|
567 | 568 | void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg, |
---|
568 | 569 | struct rds_sock *rs, u32 *key_ret, |
---|
569 | | - struct rds_connection *conn); |
---|
| 570 | + struct rds_connection *conn, |
---|
| 571 | + u64 start, u64 length, int need_odp); |
---|
570 | 572 | void (*sync_mr)(void *trans_private, int direction); |
---|
571 | 573 | void (*free_mr)(void *trans_private, int invalidate); |
---|
572 | 574 | void (*flush_mrs)(void); |
---|
573 | 575 | bool (*t_unloading)(struct rds_connection *conn); |
---|
| 576 | + u8 (*get_tos_map)(u8 tos); |
---|
574 | 577 | }; |
---|
575 | 578 | |
---|
576 | 579 | /* Bind hash table key length. It is the sum of the size of a struct |
---|
.. | .. |
---|
652 | 655 | u8 rs_rx_traces; |
---|
653 | 656 | u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; |
---|
654 | 657 | struct rds_msg_zcopy_queue rs_zcookie_queue; |
---|
| 658 | + u8 rs_tos; |
---|
655 | 659 | }; |
---|
656 | 660 | |
---|
657 | 661 | static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) |
---|
.. | .. |
---|
713 | 717 | uint64_t s_cong_send_blocked; |
---|
714 | 718 | uint64_t s_recv_bytes_added_to_socket; |
---|
715 | 719 | uint64_t s_recv_bytes_removed_from_socket; |
---|
716 | | - |
---|
| 720 | + uint64_t s_send_stuck_rm; |
---|
717 | 721 | }; |
---|
718 | 722 | |
---|
719 | 723 | /* af_rds.c */ |
---|
.. | .. |
---|
760 | 764 | struct rds_connection *rds_conn_create(struct net *net, |
---|
761 | 765 | const struct in6_addr *laddr, |
---|
762 | 766 | const struct in6_addr *faddr, |
---|
763 | | - struct rds_transport *trans, gfp_t gfp, |
---|
| 767 | + struct rds_transport *trans, |
---|
| 768 | + u8 tos, gfp_t gfp, |
---|
764 | 769 | int dev_if); |
---|
765 | 770 | struct rds_connection *rds_conn_create_outgoing(struct net *net, |
---|
766 | 771 | const struct in6_addr *laddr, |
---|
767 | 772 | const struct in6_addr *faddr, |
---|
768 | 773 | struct rds_transport *trans, |
---|
769 | | - gfp_t gfp, int dev_if); |
---|
| 774 | + u8 tos, gfp_t gfp, int dev_if); |
---|
770 | 775 | void rds_conn_shutdown(struct rds_conn_path *cpath); |
---|
771 | 776 | void rds_conn_destroy(struct rds_connection *conn); |
---|
772 | 777 | void rds_conn_drop(struct rds_connection *conn); |
---|
773 | 778 | void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy); |
---|
774 | 779 | void rds_conn_connect_if_down(struct rds_connection *conn); |
---|
775 | 780 | void rds_conn_path_connect_if_down(struct rds_conn_path *cp); |
---|
| 781 | +void rds_check_all_paths(struct rds_connection *conn); |
---|
776 | 782 | void rds_for_each_conn_info(struct socket *sock, unsigned int len, |
---|
777 | 783 | struct rds_info_iterator *iter, |
---|
778 | 784 | struct rds_info_lengths *lens, |
---|
.. | .. |
---|
815 | 821 | rds_conn_path_up(struct rds_conn_path *cp) |
---|
816 | 822 | { |
---|
817 | 823 | return atomic_read(&cp->cp_state) == RDS_CONN_UP; |
---|
| 824 | +} |
---|
| 825 | + |
---|
| 826 | +static inline int |
---|
| 827 | +rds_conn_path_down(struct rds_conn_path *cp) |
---|
| 828 | +{ |
---|
| 829 | + return atomic_read(&cp->cp_state) == RDS_CONN_DOWN; |
---|
818 | 830 | } |
---|
819 | 831 | |
---|
820 | 832 | static inline int |
---|
.. | .. |
---|
912 | 924 | |
---|
913 | 925 | /* rdma.c */ |
---|
914 | 926 | void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); |
---|
915 | | -int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen); |
---|
916 | | -int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen); |
---|
917 | | -int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen); |
---|
| 927 | +int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen); |
---|
| 928 | +int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen); |
---|
| 929 | +int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen); |
---|
918 | 930 | void rds_rdma_drop_keys(struct rds_sock *rs); |
---|
919 | 931 | int rds_rdma_extra_size(struct rds_rdma_args *args, |
---|
920 | 932 | struct rds_iov_vector *iov); |
---|
.. | .. |
---|
932 | 944 | int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, |
---|
933 | 945 | struct cmsghdr *cmsg); |
---|
934 | 946 | |
---|
935 | | -void __rds_put_mr_final(struct rds_mr *mr); |
---|
936 | | -static inline void rds_mr_put(struct rds_mr *mr) |
---|
937 | | -{ |
---|
938 | | - if (refcount_dec_and_test(&mr->r_refcount)) |
---|
939 | | - __rds_put_mr_final(mr); |
---|
940 | | -} |
---|
| 947 | +void __rds_put_mr_final(struct kref *kref); |
---|
941 | 948 | |
---|
942 | 949 | static inline bool rds_destroy_pending(struct rds_connection *conn) |
---|
943 | 950 | { |
---|
.. | .. |
---|
945 | 952 | (conn->c_trans->t_unloading && conn->c_trans->t_unloading(conn)); |
---|
946 | 953 | } |
---|
947 | 954 | |
---|
| 955 | +enum { |
---|
| 956 | + ODP_NOT_NEEDED, |
---|
| 957 | + ODP_ZEROBASED, |
---|
| 958 | + ODP_VIRTUAL |
---|
| 959 | +}; |
---|
| 960 | + |
---|
948 | 961 | /* stats.c */ |
---|
949 | 962 | DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); |
---|
950 | 963 | #define rds_stats_inc_which(which, member) do { \ |
---|