| .. | .. |
|---|
| 19 | 19 | */ |
|---|
| 20 | 20 | #define RDS_PROTOCOL_3_0 0x0300 |
|---|
| 21 | 21 | #define RDS_PROTOCOL_3_1 0x0301 |
|---|
| 22 | +#define RDS_PROTOCOL_4_0 0x0400 |
|---|
| 23 | +#define RDS_PROTOCOL_4_1 0x0401 |
|---|
| 22 | 24 | #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 |
|---|
| 23 | 25 | #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) |
|---|
| 24 | 26 | #define RDS_PROTOCOL_MINOR(v) ((v) & 255) |
|---|
| 25 | 27 | #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) |
|---|
| 28 | +#define RDS_PROTOCOL_COMPAT_VERSION RDS_PROTOCOL_3_1 |
|---|
| 26 | 29 | |
|---|
| 27 | 30 | /* The following ports, 16385, 18634, 18635, are registered with IANA as |
|---|
| 28 | 31 | * the ports to be used for RDS over TCP and UDP. Currently, only RDS over |
|---|
| .. | .. |
|---|
| 37 | 40 | #ifdef ATOMIC64_INIT |
|---|
| 38 | 41 | #define KERNEL_HAS_ATOMIC64 |
|---|
| 39 | 42 | #endif |
|---|
| 40 | | - |
|---|
| 41 | 43 | #ifdef RDS_DEBUG |
|---|
| 42 | 44 | #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) |
|---|
| 43 | 45 | #else |
|---|
| .. | .. |
|---|
| 47 | 49 | { |
|---|
| 48 | 50 | } |
|---|
| 49 | 51 | #endif |
|---|
| 50 | | - |
|---|
| 51 | | -/* XXX is there one of these somewhere? */ |
|---|
| 52 | | -#define ceil(x, y) \ |
|---|
| 53 | | - ({ unsigned long __x = (x), __y = (y); (__x + __y - 1) / __y; }) |
|---|
| 54 | 52 | |
|---|
| 55 | 53 | #define RDS_FRAG_SHIFT 12 |
|---|
| 56 | 54 | #define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) |
|---|
| .. | .. |
|---|
| 155 | 153 | struct rds_cong_map *c_fcong; |
|---|
| 156 | 154 | |
|---|
| 157 | 155 | /* Protocol version */ |
|---|
| 156 | + unsigned int c_proposed_version; |
|---|
| 158 | 157 | unsigned int c_version; |
|---|
| 159 | 158 | possible_net_t c_net; |
|---|
| 159 | + |
|---|
| 160 | + /* TOS */ |
|---|
| 161 | + u8 c_tos; |
|---|
| 160 | 162 | |
|---|
| 161 | 163 | struct list_head c_map_item; |
|---|
| 162 | 164 | unsigned long c_map_queued; |
|---|
| .. | .. |
|---|
| 268 | 270 | #define RDS_MSG_RX_END 2 |
|---|
| 269 | 271 | #define RDS_MSG_RX_CMSG 3 |
|---|
| 270 | 272 | |
|---|
| 273 | +/* The following values are whitelisted for usercopy */ |
|---|
| 274 | +struct rds_inc_usercopy { |
|---|
| 275 | + rds_rdma_cookie_t rdma_cookie; |
|---|
| 276 | + ktime_t rx_tstamp; |
|---|
| 277 | +}; |
|---|
| 278 | + |
|---|
| 271 | 279 | struct rds_incoming { |
|---|
| 272 | 280 | refcount_t i_refcount; |
|---|
| 273 | 281 | struct list_head i_item; |
|---|
| .. | .. |
|---|
| 277 | 285 | unsigned long i_rx_jiffies; |
|---|
| 278 | 286 | struct in6_addr i_saddr; |
|---|
| 279 | 287 | |
|---|
| 280 | | - rds_rdma_cookie_t i_rdma_cookie; |
|---|
| 281 | | - struct timeval i_rx_tstamp; |
|---|
| 288 | + struct rds_inc_usercopy i_usercopy; |
|---|
| 282 | 289 | u64 i_rx_lat_trace[RDS_RX_MAX_TRACES]; |
|---|
| 283 | 290 | }; |
|---|
| 284 | 291 | |
|---|
| 285 | 292 | struct rds_mr { |
|---|
| 286 | 293 | struct rb_node r_rb_node; |
|---|
| 287 | | - refcount_t r_refcount; |
|---|
| 294 | + struct kref r_kref; |
|---|
| 288 | 295 | u32 r_key; |
|---|
| 289 | 296 | |
|---|
| 290 | 297 | /* A copy of the creation flags */ |
|---|
| .. | .. |
|---|
| 292 | 299 | unsigned int r_invalidate:1; |
|---|
| 293 | 300 | unsigned int r_write:1; |
|---|
| 294 | 301 | |
|---|
| 295 | | - /* This is for RDS_MR_DEAD. |
|---|
| 296 | | - * It would be nice & consistent to make this part of the above |
|---|
| 297 | | - * bit field here, but we need to use test_and_set_bit. |
|---|
| 298 | | - */ |
|---|
| 299 | | - unsigned long r_state; |
|---|
| 300 | 302 | struct rds_sock *r_sock; /* back pointer to the socket that owns us */ |
|---|
| 301 | 303 | struct rds_transport *r_trans; |
|---|
| 302 | 304 | void *r_trans_private; |
|---|
| 303 | 305 | }; |
|---|
| 304 | | - |
|---|
| 305 | | -/* Flags for mr->r_state */ |
|---|
| 306 | | -#define RDS_MR_DEAD 0 |
|---|
| 307 | 306 | |
|---|
| 308 | 307 | static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset) |
|---|
| 309 | 308 | { |
|---|
| .. | .. |
|---|
| 470 | 469 | struct rds_notifier *op_notifier; |
|---|
| 471 | 470 | |
|---|
| 472 | 471 | struct rds_mr *op_rdma_mr; |
|---|
| 472 | + |
|---|
| 473 | + u64 op_odp_addr; |
|---|
| 474 | + struct rds_mr *op_odp_mr; |
|---|
| 473 | 475 | } rdma; |
|---|
| 474 | 476 | struct rm_data_op { |
|---|
| 475 | 477 | unsigned int op_active:1; |
|---|
| 476 | | - unsigned int op_notify:1; |
|---|
| 477 | 478 | unsigned int op_nents; |
|---|
| 478 | 479 | unsigned int op_count; |
|---|
| 479 | 480 | unsigned int op_dmasg; |
|---|
| .. | .. |
|---|
| 566 | 567 | void (*exit)(void); |
|---|
| 567 | 568 | void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg, |
|---|
| 568 | 569 | struct rds_sock *rs, u32 *key_ret, |
|---|
| 569 | | - struct rds_connection *conn); |
|---|
| 570 | + struct rds_connection *conn, |
|---|
| 571 | + u64 start, u64 length, int need_odp); |
|---|
| 570 | 572 | void (*sync_mr)(void *trans_private, int direction); |
|---|
| 571 | 573 | void (*free_mr)(void *trans_private, int invalidate); |
|---|
| 572 | 574 | void (*flush_mrs)(void); |
|---|
| 573 | 575 | bool (*t_unloading)(struct rds_connection *conn); |
|---|
| 576 | + u8 (*get_tos_map)(u8 tos); |
|---|
| 574 | 577 | }; |
|---|
| 575 | 578 | |
|---|
| 576 | 579 | /* Bind hash table key length. It is the sum of the size of a struct |
|---|
| .. | .. |
|---|
| 652 | 655 | u8 rs_rx_traces; |
|---|
| 653 | 656 | u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; |
|---|
| 654 | 657 | struct rds_msg_zcopy_queue rs_zcookie_queue; |
|---|
| 658 | + u8 rs_tos; |
|---|
| 655 | 659 | }; |
|---|
| 656 | 660 | |
|---|
| 657 | 661 | static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) |
|---|
| .. | .. |
|---|
| 713 | 717 | uint64_t s_cong_send_blocked; |
|---|
| 714 | 718 | uint64_t s_recv_bytes_added_to_socket; |
|---|
| 715 | 719 | uint64_t s_recv_bytes_removed_from_socket; |
|---|
| 716 | | - |
|---|
| 720 | + uint64_t s_send_stuck_rm; |
|---|
| 717 | 721 | }; |
|---|
| 718 | 722 | |
|---|
| 719 | 723 | /* af_rds.c */ |
|---|
| .. | .. |
|---|
| 760 | 764 | struct rds_connection *rds_conn_create(struct net *net, |
|---|
| 761 | 765 | const struct in6_addr *laddr, |
|---|
| 762 | 766 | const struct in6_addr *faddr, |
|---|
| 763 | | - struct rds_transport *trans, gfp_t gfp, |
|---|
| 767 | + struct rds_transport *trans, |
|---|
| 768 | + u8 tos, gfp_t gfp, |
|---|
| 764 | 769 | int dev_if); |
|---|
| 765 | 770 | struct rds_connection *rds_conn_create_outgoing(struct net *net, |
|---|
| 766 | 771 | const struct in6_addr *laddr, |
|---|
| 767 | 772 | const struct in6_addr *faddr, |
|---|
| 768 | 773 | struct rds_transport *trans, |
|---|
| 769 | | - gfp_t gfp, int dev_if); |
|---|
| 774 | + u8 tos, gfp_t gfp, int dev_if); |
|---|
| 770 | 775 | void rds_conn_shutdown(struct rds_conn_path *cpath); |
|---|
| 771 | 776 | void rds_conn_destroy(struct rds_connection *conn); |
|---|
| 772 | 777 | void rds_conn_drop(struct rds_connection *conn); |
|---|
| 773 | 778 | void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy); |
|---|
| 774 | 779 | void rds_conn_connect_if_down(struct rds_connection *conn); |
|---|
| 775 | 780 | void rds_conn_path_connect_if_down(struct rds_conn_path *cp); |
|---|
| 781 | +void rds_check_all_paths(struct rds_connection *conn); |
|---|
| 776 | 782 | void rds_for_each_conn_info(struct socket *sock, unsigned int len, |
|---|
| 777 | 783 | struct rds_info_iterator *iter, |
|---|
| 778 | 784 | struct rds_info_lengths *lens, |
|---|
| .. | .. |
|---|
| 815 | 821 | rds_conn_path_up(struct rds_conn_path *cp) |
|---|
| 816 | 822 | { |
|---|
| 817 | 823 | return atomic_read(&cp->cp_state) == RDS_CONN_UP; |
|---|
| 824 | +} |
|---|
| 825 | + |
|---|
| 826 | +static inline int |
|---|
| 827 | +rds_conn_path_down(struct rds_conn_path *cp) |
|---|
| 828 | +{ |
|---|
| 829 | + return atomic_read(&cp->cp_state) == RDS_CONN_DOWN; |
|---|
| 818 | 830 | } |
|---|
| 819 | 831 | |
|---|
| 820 | 832 | static inline int |
|---|
| .. | .. |
|---|
| 912 | 924 | |
|---|
| 913 | 925 | /* rdma.c */ |
|---|
| 914 | 926 | void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); |
|---|
| 915 | | -int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen); |
|---|
| 916 | | -int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen); |
|---|
| 917 | | -int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen); |
|---|
| 927 | +int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen); |
|---|
| 928 | +int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen); |
|---|
| 929 | +int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen); |
|---|
| 918 | 930 | void rds_rdma_drop_keys(struct rds_sock *rs); |
|---|
| 919 | 931 | int rds_rdma_extra_size(struct rds_rdma_args *args, |
|---|
| 920 | 932 | struct rds_iov_vector *iov); |
|---|
| .. | .. |
|---|
| 932 | 944 | int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, |
|---|
| 933 | 945 | struct cmsghdr *cmsg); |
|---|
| 934 | 946 | |
|---|
| 935 | | -void __rds_put_mr_final(struct rds_mr *mr); |
|---|
| 936 | | -static inline void rds_mr_put(struct rds_mr *mr) |
|---|
| 937 | | -{ |
|---|
| 938 | | - if (refcount_dec_and_test(&mr->r_refcount)) |
|---|
| 939 | | - __rds_put_mr_final(mr); |
|---|
| 940 | | -} |
|---|
| 947 | +void __rds_put_mr_final(struct kref *kref); |
|---|
| 941 | 948 | |
|---|
| 942 | 949 | static inline bool rds_destroy_pending(struct rds_connection *conn) |
|---|
| 943 | 950 | { |
|---|
| .. | .. |
|---|
| 945 | 952 | (conn->c_trans->t_unloading && conn->c_trans->t_unloading(conn)); |
|---|
| 946 | 953 | } |
|---|
| 947 | 954 | |
|---|
| 955 | +enum { |
|---|
| 956 | + ODP_NOT_NEEDED, |
|---|
| 957 | + ODP_ZEROBASED, |
|---|
| 958 | + ODP_VIRTUAL |
|---|
| 959 | +}; |
|---|
| 960 | + |
|---|
| 948 | 961 | /* stats.c */ |
|---|
| 949 | 962 | DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); |
|---|
| 950 | 963 | #define rds_stats_inc_which(which, member) do { \ |
|---|