hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/rds/rds.h
....@@ -19,10 +19,13 @@
1919 */
2020 #define RDS_PROTOCOL_3_0 0x0300
2121 #define RDS_PROTOCOL_3_1 0x0301
22
+#define RDS_PROTOCOL_4_0 0x0400
23
+#define RDS_PROTOCOL_4_1 0x0401
2224 #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1
2325 #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8)
2426 #define RDS_PROTOCOL_MINOR(v) ((v) & 255)
2527 #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min)
28
+#define RDS_PROTOCOL_COMPAT_VERSION RDS_PROTOCOL_3_1
2629
2730 /* The following ports, 16385, 18634, 18635, are registered with IANA as
2831 * the ports to be used for RDS over TCP and UDP. Currently, only RDS over
....@@ -37,7 +40,6 @@
3740 #ifdef ATOMIC64_INIT
3841 #define KERNEL_HAS_ATOMIC64
3942 #endif
40
-
4143 #ifdef RDS_DEBUG
4244 #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
4345 #else
....@@ -47,10 +49,6 @@
4749 {
4850 }
4951 #endif
50
-
51
-/* XXX is there one of these somewhere? */
52
-#define ceil(x, y) \
53
- ({ unsigned long __x = (x), __y = (y); (__x + __y - 1) / __y; })
5452
5553 #define RDS_FRAG_SHIFT 12
5654 #define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT))
....@@ -155,8 +153,12 @@
155153 struct rds_cong_map *c_fcong;
156154
157155 /* Protocol version */
156
+ unsigned int c_proposed_version;
158157 unsigned int c_version;
159158 possible_net_t c_net;
159
+
160
+ /* TOS */
161
+ u8 c_tos;
160162
161163 struct list_head c_map_item;
162164 unsigned long c_map_queued;
....@@ -268,6 +270,12 @@
268270 #define RDS_MSG_RX_END 2
269271 #define RDS_MSG_RX_CMSG 3
270272
273
+/* The following values are whitelisted for usercopy */
274
+struct rds_inc_usercopy {
275
+ rds_rdma_cookie_t rdma_cookie;
276
+ ktime_t rx_tstamp;
277
+};
278
+
271279 struct rds_incoming {
272280 refcount_t i_refcount;
273281 struct list_head i_item;
....@@ -277,14 +285,13 @@
277285 unsigned long i_rx_jiffies;
278286 struct in6_addr i_saddr;
279287
280
- rds_rdma_cookie_t i_rdma_cookie;
281
- struct timeval i_rx_tstamp;
288
+ struct rds_inc_usercopy i_usercopy;
282289 u64 i_rx_lat_trace[RDS_RX_MAX_TRACES];
283290 };
284291
285292 struct rds_mr {
286293 struct rb_node r_rb_node;
287
- refcount_t r_refcount;
294
+ struct kref r_kref;
288295 u32 r_key;
289296
290297 /* A copy of the creation flags */
....@@ -292,18 +299,10 @@
292299 unsigned int r_invalidate:1;
293300 unsigned int r_write:1;
294301
295
- /* This is for RDS_MR_DEAD.
296
- * It would be nice & consistent to make this part of the above
297
- * bit field here, but we need to use test_and_set_bit.
298
- */
299
- unsigned long r_state;
300302 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
301303 struct rds_transport *r_trans;
302304 void *r_trans_private;
303305 };
304
-
305
-/* Flags for mr->r_state */
306
-#define RDS_MR_DEAD 0
307306
308307 static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
309308 {
....@@ -470,10 +469,12 @@
470469 struct rds_notifier *op_notifier;
471470
472471 struct rds_mr *op_rdma_mr;
472
+
473
+ u64 op_odp_addr;
474
+ struct rds_mr *op_odp_mr;
473475 } rdma;
474476 struct rm_data_op {
475477 unsigned int op_active:1;
476
- unsigned int op_notify:1;
477478 unsigned int op_nents;
478479 unsigned int op_count;
479480 unsigned int op_dmasg;
....@@ -566,11 +567,13 @@
566567 void (*exit)(void);
567568 void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg,
568569 struct rds_sock *rs, u32 *key_ret,
569
- struct rds_connection *conn);
570
+ struct rds_connection *conn,
571
+ u64 start, u64 length, int need_odp);
570572 void (*sync_mr)(void *trans_private, int direction);
571573 void (*free_mr)(void *trans_private, int invalidate);
572574 void (*flush_mrs)(void);
573575 bool (*t_unloading)(struct rds_connection *conn);
576
+ u8 (*get_tos_map)(u8 tos);
574577 };
575578
576579 /* Bind hash table key length. It is the sum of the size of a struct
....@@ -652,6 +655,7 @@
652655 u8 rs_rx_traces;
653656 u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
654657 struct rds_msg_zcopy_queue rs_zcookie_queue;
658
+ u8 rs_tos;
655659 };
656660
657661 static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
....@@ -713,7 +717,7 @@
713717 uint64_t s_cong_send_blocked;
714718 uint64_t s_recv_bytes_added_to_socket;
715719 uint64_t s_recv_bytes_removed_from_socket;
716
-
720
+ uint64_t s_send_stuck_rm;
717721 };
718722
719723 /* af_rds.c */
....@@ -760,19 +764,21 @@
760764 struct rds_connection *rds_conn_create(struct net *net,
761765 const struct in6_addr *laddr,
762766 const struct in6_addr *faddr,
763
- struct rds_transport *trans, gfp_t gfp,
767
+ struct rds_transport *trans,
768
+ u8 tos, gfp_t gfp,
764769 int dev_if);
765770 struct rds_connection *rds_conn_create_outgoing(struct net *net,
766771 const struct in6_addr *laddr,
767772 const struct in6_addr *faddr,
768773 struct rds_transport *trans,
769
- gfp_t gfp, int dev_if);
774
+ u8 tos, gfp_t gfp, int dev_if);
770775 void rds_conn_shutdown(struct rds_conn_path *cpath);
771776 void rds_conn_destroy(struct rds_connection *conn);
772777 void rds_conn_drop(struct rds_connection *conn);
773778 void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy);
774779 void rds_conn_connect_if_down(struct rds_connection *conn);
775780 void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
781
+void rds_check_all_paths(struct rds_connection *conn);
776782 void rds_for_each_conn_info(struct socket *sock, unsigned int len,
777783 struct rds_info_iterator *iter,
778784 struct rds_info_lengths *lens,
....@@ -815,6 +821,12 @@
815821 rds_conn_path_up(struct rds_conn_path *cp)
816822 {
817823 return atomic_read(&cp->cp_state) == RDS_CONN_UP;
824
+}
825
+
826
+static inline int
827
+rds_conn_path_down(struct rds_conn_path *cp)
828
+{
829
+ return atomic_read(&cp->cp_state) == RDS_CONN_DOWN;
818830 }
819831
820832 static inline int
....@@ -912,9 +924,9 @@
912924
913925 /* rdma.c */
914926 void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
915
-int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
916
-int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
917
-int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
927
+int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen);
928
+int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen);
929
+int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen);
918930 void rds_rdma_drop_keys(struct rds_sock *rs);
919931 int rds_rdma_extra_size(struct rds_rdma_args *args,
920932 struct rds_iov_vector *iov);
....@@ -932,12 +944,7 @@
932944 int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
933945 struct cmsghdr *cmsg);
934946
935
-void __rds_put_mr_final(struct rds_mr *mr);
936
-static inline void rds_mr_put(struct rds_mr *mr)
937
-{
938
- if (refcount_dec_and_test(&mr->r_refcount))
939
- __rds_put_mr_final(mr);
940
-}
947
+void __rds_put_mr_final(struct kref *kref);
941948
942949 static inline bool rds_destroy_pending(struct rds_connection *conn)
943950 {
....@@ -945,6 +952,12 @@
945952 (conn->c_trans->t_unloading && conn->c_trans->t_unloading(conn));
946953 }
947954
955
+enum {
956
+ ODP_NOT_NEEDED,
957
+ ODP_ZEROBASED,
958
+ ODP_VIRTUAL
959
+};
960
+
948961 /* stats.c */
949962 DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
950963 #define rds_stats_inc_which(which, member) do { \