hc
2024-01-31 f9004dbfff8a3fbbd7e2a88c8a4327c7f2f8e5b2
kernel/net/smc/smc_core.h
....@@ -32,10 +32,10 @@
3232 };
3333
3434 enum smc_link_state { /* possible states of a link */
35
+ SMC_LNK_UNUSED, /* link is unused */
3536 SMC_LNK_INACTIVE, /* link is inactive */
3637 SMC_LNK_ACTIVATING, /* link is being activated */
3738 SMC_LNK_ACTIVE, /* link is active */
38
- SMC_LNK_DELETING, /* link is being deleted */
3939 };
4040
4141 #define SMC_WR_BUF_SIZE 48 /* size of work request buffer */
....@@ -52,6 +52,26 @@
5252 FAILED /* ib_wr_reg_mr response: failure */
5353 };
5454
55
+struct smc_rdma_sge { /* sges for RDMA writes */
56
+ struct ib_sge wr_tx_rdma_sge[SMC_IB_MAX_SEND_SGE];
57
+};
58
+
59
+#define SMC_MAX_RDMA_WRITES 2 /* max. # of RDMA writes per
60
+ * message send
61
+ */
62
+
63
+struct smc_rdma_sges { /* sges per message send */
64
+ struct smc_rdma_sge tx_rdma_sge[SMC_MAX_RDMA_WRITES];
65
+};
66
+
67
+struct smc_rdma_wr { /* work requests per message
68
+ * send
69
+ */
70
+ struct ib_rdma_wr wr_tx_rdma[SMC_MAX_RDMA_WRITES];
71
+};
72
+
73
+#define SMC_LGR_ID_SIZE 4
74
+
5575 struct smc_link {
5676 struct smc_ib_device *smcibdev; /* ib-device */
5777 u8 ibport; /* port - values 1 | 2 */
....@@ -64,13 +84,17 @@
6484 struct smc_wr_buf *wr_tx_bufs; /* WR send payload buffers */
6585 struct ib_send_wr *wr_tx_ibs; /* WR send meta data */
6686 struct ib_sge *wr_tx_sges; /* WR send gather meta data */
87
+ struct smc_rdma_sges *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/
88
+ struct smc_rdma_wr *wr_tx_rdmas; /* WR RDMA WRITE */
6789 struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */
90
+ struct completion *wr_tx_compl; /* WR send CQE completion */
6891 /* above four vectors have wr_tx_cnt elements and use the same index */
6992 dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */
7093 atomic_long_t wr_tx_id; /* seq # of last sent WR */
7194 unsigned long *wr_tx_mask; /* bit mask of used indexes */
7295 u32 wr_tx_cnt; /* number of WR send buffers */
7396 wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */
97
+ atomic_t wr_tx_refcnt; /* tx refs to link */
7498
7599 struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */
76100 struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */
....@@ -83,6 +107,7 @@
83107
84108 struct ib_reg_wr wr_reg; /* WR register memory region */
85109 wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */
110
+ atomic_t wr_reg_refcnt; /* reg refs to link */
86111 enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */
87112
88113 u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/
....@@ -95,30 +120,24 @@
95120 u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
96121 u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/
97122 u8 link_id; /* unique # within link group */
123
+ u8 link_uid[SMC_LGR_ID_SIZE]; /* unique lnk id */
124
+ u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */
125
+ u8 link_idx; /* index in lgr link array */
126
+ u8 link_is_asym; /* is link asymmetric? */
127
+ struct smc_link_group *lgr; /* parent link group */
128
+ struct work_struct link_down_wrk; /* wrk to bring link down */
98129
99130 enum smc_link_state state; /* state of link */
100
- struct workqueue_struct *llc_wq; /* single thread work queue */
101
- struct completion llc_confirm; /* wait for rx of conf link */
102
- struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
103
- int llc_confirm_rc; /* rc from confirm link msg */
104
- int llc_confirm_resp_rc; /* rc from conf_resp msg */
105
- struct completion llc_add; /* wait for rx of add link */
106
- struct completion llc_add_resp; /* wait for rx of add link rsp*/
107131 struct delayed_work llc_testlink_wrk; /* testlink worker */
108132 struct completion llc_testlink_resp; /* wait for rx of testlink */
109133 int llc_testlink_time; /* testlink interval */
110
- struct completion llc_confirm_rkey; /* wait 4 rx of cnf rkey */
111
- int llc_confirm_rkey_rc; /* rc from cnf rkey msg */
112134 };
113135
114136 /* For now we just allow one parallel link per link group. The SMC protocol
115137 * allows more (up to 8).
116138 */
117
-#define SMC_LINKS_PER_LGR_MAX 1
139
+#define SMC_LINKS_PER_LGR_MAX 3
118140 #define SMC_SINGLE_LINK 0
119
-
120
-#define SMC_FIRST_CONTACT 1 /* first contact to a peer */
121
-#define SMC_REUSE_CONTACT 0 /* follow-on contact to a peer*/
122141
123142 /* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */
124143 struct smc_buf_desc {
....@@ -127,25 +146,32 @@
127146 struct page *pages;
128147 int len; /* length of buffer */
129148 u32 used; /* currently used / unused */
130
- u8 reused : 1; /* new created / reused */
131
- u8 regerr : 1; /* err during registration */
132149 union {
133150 struct { /* SMC-R */
134
- struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
135
- /* virtual buffer */
136
- struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
137
- /* for rmb only: memory region
138
- * incl. rkey provided to peer
139
- */
140
- u32 order; /* allocation order */
151
+ struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
152
+ /* virtual buffer */
153
+ struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
154
+ /* for rmb only: memory region
155
+ * incl. rkey provided to peer
156
+ */
157
+ u32 order; /* allocation order */
158
+
159
+ u8 is_conf_rkey;
160
+ /* confirm_rkey done */
161
+ u8 is_reg_mr[SMC_LINKS_PER_LGR_MAX];
162
+ /* mem region registered */
163
+ u8 is_map_ib[SMC_LINKS_PER_LGR_MAX];
164
+ /* mem region mapped to lnk */
165
+ u8 is_reg_err;
166
+ /* buffer registration err */
141167 };
142168 struct { /* SMC-D */
143
- unsigned short sba_idx;
144
- /* SBA index number */
145
- u64 token;
146
- /* DMB token number */
147
- dma_addr_t dma_addr;
148
- /* DMA address */
169
+ unsigned short sba_idx;
170
+ /* SBA index number */
171
+ u64 token;
172
+ /* DMB token number */
173
+ dma_addr_t dma_addr;
174
+ /* DMA address */
149175 };
150176 };
151177 };
....@@ -155,7 +181,6 @@
155181 u32 rkey;
156182 };
157183
158
-#define SMC_LGR_ID_SIZE 4
159184 #define SMC_BUF_MIN_SIZE 16384 /* minimum size of an RMB */
160185 #define SMC_RMBE_SIZES 16 /* number of distinct RMBE sizes */
161186 /* theoretically, the RFC states that largest size would be 512K,
....@@ -165,6 +190,28 @@
165190
166191 struct smcd_dev;
167192
193
+enum smc_lgr_type { /* redundancy state of lgr */
194
+ SMC_LGR_NONE, /* no active links, lgr to be deleted */
195
+ SMC_LGR_SINGLE, /* 1 active RNIC on each peer */
196
+ SMC_LGR_SYMMETRIC, /* 2 active RNICs on each peer */
197
+ SMC_LGR_ASYMMETRIC_PEER, /* local has 2, peer 1 active RNICs */
198
+ SMC_LGR_ASYMMETRIC_LOCAL, /* local has 1, peer 2 active RNICs */
199
+};
200
+
201
+enum smc_llc_flowtype {
202
+ SMC_LLC_FLOW_NONE = 0,
203
+ SMC_LLC_FLOW_ADD_LINK = 2,
204
+ SMC_LLC_FLOW_DEL_LINK = 4,
205
+ SMC_LLC_FLOW_RKEY = 6,
206
+};
207
+
208
+struct smc_llc_qentry;
209
+
210
+struct smc_llc_flow {
211
+ enum smc_llc_flowtype type;
212
+ struct smc_llc_qentry *qentry;
213
+};
214
+
168215 struct smc_link_group {
169216 struct list_head list;
170217 struct rb_root conns_all; /* connection tree */
....@@ -173,16 +220,24 @@
173220 unsigned short vlan_id; /* vlan id of link group */
174221
175222 struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */
176
- rwlock_t sndbufs_lock; /* protects tx buffers */
223
+ struct mutex sndbufs_lock; /* protects tx buffers */
177224 struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
178
- rwlock_t rmbs_lock; /* protects rx buffers */
225
+ struct mutex rmbs_lock; /* protects rx buffers */
179226
180227 u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
181228 struct delayed_work free_work; /* delayed freeing of an lgr */
229
+ struct work_struct terminate_work; /* abnormal lgr termination */
230
+ struct workqueue_struct *tx_wq; /* wq for conn. tx workers */
182231 u8 sync_err : 1; /* lgr no longer fits to peer */
183232 u8 terminating : 1;/* lgr is terminating */
233
+ u8 freeing : 1; /* lgr is being freed */
184234
185235 bool is_smcd; /* SMC-R or SMC-D */
236
+ u8 smc_version;
237
+ u8 negotiated_eid[SMC_MAX_EID_LEN];
238
+ u8 peer_os; /* peer operating system */
239
+ u8 peer_smc_release;
240
+ u8 peer_hostname[SMC_MAX_HOSTNAME_LEN];
186241 union {
187242 struct { /* SMC-R */
188243 enum smc_lgr_role role;
....@@ -196,14 +251,71 @@
196251 /* remote addr/key pairs */
197252 DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX);
198253 /* used rtoken elements */
254
+ u8 next_link_id;
255
+ enum smc_lgr_type type;
256
+ /* redundancy state */
257
+ u8 pnet_id[SMC_MAX_PNETID_LEN + 1];
258
+ /* pnet id of this lgr */
259
+ struct list_head llc_event_q;
260
+ /* queue for llc events */
261
+ spinlock_t llc_event_q_lock;
262
+ /* protects llc_event_q */
263
+ struct mutex llc_conf_mutex;
264
+ /* protects lgr reconfig. */
265
+ struct work_struct llc_add_link_work;
266
+ struct work_struct llc_del_link_work;
267
+ struct work_struct llc_event_work;
268
+ /* llc event worker */
269
+ wait_queue_head_t llc_flow_waiter;
270
+ /* w4 next llc event */
271
+ wait_queue_head_t llc_msg_waiter;
272
+ /* w4 next llc msg */
273
+ struct smc_llc_flow llc_flow_lcl;
274
+ /* llc local control field */
275
+ struct smc_llc_flow llc_flow_rmt;
276
+ /* llc remote control field */
277
+ struct smc_llc_qentry *delayed_event;
278
+ /* arrived when flow active */
279
+ spinlock_t llc_flow_lock;
280
+ /* protects llc flow */
281
+ int llc_testlink_time;
282
+ /* link keep alive time */
283
+ u32 llc_termination_rsn;
284
+ /* rsn code for termination */
199285 };
200286 struct { /* SMC-D */
201287 u64 peer_gid;
202288 /* Peer GID (remote) */
203289 struct smcd_dev *smcd;
204290 /* ISM device for VLAN reg. */
291
+ u8 peer_shutdown : 1;
292
+ /* peer triggered shutdownn */
205293 };
206294 };
295
+};
296
+
297
+struct smc_clc_msg_local;
298
+
299
+struct smc_init_info {
300
+ u8 is_smcd;
301
+ u8 smc_type_v1;
302
+ u8 smc_type_v2;
303
+ u8 first_contact_peer;
304
+ u8 first_contact_local;
305
+ unsigned short vlan_id;
306
+ /* SMC-R */
307
+ struct smc_clc_msg_local *ib_lcl;
308
+ struct smc_ib_device *ib_dev;
309
+ u8 ib_gid[SMC_GID_SIZE];
310
+ u8 ib_port;
311
+ u32 ib_clcqpn;
312
+ /* SMC-D */
313
+ u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1];
314
+ struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1];
315
+ u16 ism_chid[SMC_MAX_ISM_DEVS + 1];
316
+ u8 ism_offered_cnt; /* # of ISM devices offered */
317
+ u8 ism_selected; /* index of selected ISM dev*/
318
+ u8 smcd_version;
207319 };
208320
209321 /* Find the connection associated with the given alert token in the link group.
....@@ -239,38 +351,75 @@
239351 return res;
240352 }
241353
354
+/* returns true if the specified link is usable */
355
+static inline bool smc_link_usable(struct smc_link *lnk)
356
+{
357
+ if (lnk->state == SMC_LNK_UNUSED || lnk->state == SMC_LNK_INACTIVE)
358
+ return false;
359
+ return true;
360
+}
361
+
362
+static inline bool smc_link_sendable(struct smc_link *lnk)
363
+{
364
+ return smc_link_usable(lnk) &&
365
+ lnk->qp_attr.cur_qp_state == IB_QPS_RTS;
366
+}
367
+
368
+static inline bool smc_link_active(struct smc_link *lnk)
369
+{
370
+ return lnk->state == SMC_LNK_ACTIVE;
371
+}
372
+
242373 struct smc_sock;
243374 struct smc_clc_msg_accept_confirm;
244375 struct smc_clc_msg_local;
245376
246
-void smc_lgr_free(struct smc_link_group *lgr);
247
-void smc_lgr_forget(struct smc_link_group *lgr);
248
-void smc_lgr_terminate(struct smc_link_group *lgr);
249
-void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
250
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid);
377
+void smc_lgr_cleanup_early(struct smc_connection *conn);
378
+void smc_lgr_terminate_sched(struct smc_link_group *lgr);
379
+void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
380
+void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
381
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
382
+ unsigned short vlan);
383
+void smc_smcd_terminate_all(struct smcd_dev *dev);
384
+void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
251385 int smc_buf_create(struct smc_sock *smc, bool is_smcd);
252386 int smc_uncompress_bufsize(u8 compressed);
253
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
387
+int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
254388 struct smc_clc_msg_accept_confirm *clc);
255
-int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
256
-int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
389
+int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey);
390
+int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey);
391
+void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
392
+ __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey);
393
+void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
394
+ __be64 nw_vaddr, __be32 nw_rkey);
257395 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
258396 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
259397 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
260398 void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
261
-int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
399
+int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini);
262400
263401 void smc_conn_free(struct smc_connection *conn);
264
-int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
265
- struct smc_ib_device *smcibdev, u8 ibport,
266
- struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
267
- u64 peer_gid);
268
-void smcd_conn_free(struct smc_connection *conn);
402
+int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini);
269403 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
404
+int smc_core_init(void);
270405 void smc_core_exit(void);
406
+
407
+int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
408
+ u8 link_idx, struct smc_init_info *ini);
409
+void smcr_link_clear(struct smc_link *lnk, bool log);
410
+int smcr_buf_map_lgr(struct smc_link *lnk);
411
+int smcr_buf_reg_lgr(struct smc_link *lnk);
412
+void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type);
413
+void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
414
+ enum smc_lgr_type new_type, int asym_lnk_idx);
415
+int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
416
+struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
417
+ struct smc_link *from_lnk, bool is_dev_err);
418
+void smcr_link_down_cond(struct smc_link *lnk);
419
+void smcr_link_down_cond_sched(struct smc_link *lnk);
271420
272421 static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
273422 {
274
- return container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
423
+ return link->lgr;
275424 }
276425 #endif