From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/fs/nfsd/nfs4callback.c | 342 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 files changed, 254 insertions(+), 88 deletions(-) diff --git a/kernel/fs/nfsd/nfs4callback.c b/kernel/fs/nfsd/nfs4callback.c index 7ee417b..f5b7ad0 100644 --- a/kernel/fs/nfsd/nfs4callback.c +++ b/kernel/fs/nfsd/nfs4callback.c @@ -38,7 +38,9 @@ #include "nfsd.h" #include "state.h" #include "netns.h" +#include "trace.h" #include "xdr4cb.h" +#include "xdr4.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -58,16 +60,6 @@ /* res */ int status; }; - -/* - * Handle decode buffer overflows out-of-line. - */ -static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) -{ - dprintk("NFS: %s prematurely hit the end of our receive buffer. " - "Remaining buffer length is %tu words.\n", - func, xdr->end - xdr->p); -} static __be32 *xdr_encode_empty_array(__be32 *p) { @@ -105,6 +97,7 @@ OP_CB_WANTS_CANCELLED = 12, OP_CB_NOTIFY_LOCK = 13, OP_CB_NOTIFY_DEVICEID = 14, + OP_CB_OFFLOAD = 15, OP_CB_ILLEGAL = 10044 }; @@ -238,7 +231,6 @@ *status = nfs_cb_stat_to_errno(be32_to_cpup(p)); return 0; out_overflow: - print_overflow_msg(__func__, xdr); return -EIO; out_unexpected: dprintk("NFSD: Callback server returned operation %d but " @@ -307,7 +299,6 @@ hdr->nops = be32_to_cpup(p); return 0; out_overflow: - print_overflow_msg(__func__, xdr); return -EIO; } @@ -435,7 +426,6 @@ cb->cb_seq_status = status; return status; out_overflow: - print_overflow_msg(__func__, xdr); status = -EIO; goto out; } @@ -523,11 +513,9 @@ if (unlikely(status)) return status; - if (cb != NULL) { - status = decode_cb_sequence4res(xdr, cb); - if (unlikely(status || cb->cb_seq_status)) - return status; - } + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status || cb->cb_seq_status)) + return status; return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status); } @@ -615,11 +603,10 @@ if (unlikely(status)) return status; - if (cb) { - status = decode_cb_sequence4res(xdr, cb); - if (unlikely(status || cb->cb_seq_status)) - return status; - } + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status || cb->cb_seq_status)) + return status; + return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status); } #endif /* CONFIG_NFSD_PNFS */ @@ -674,14 +661,107 @@ if (unlikely(status)) return status; - if (cb) { - status = decode_cb_sequence4res(xdr, cb); - if (unlikely(status || cb->cb_seq_status)) - return status; - } + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status || cb->cb_seq_status)) + return status; + return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status); } +/* + * struct write_response4 { + * stateid4 wr_callback_id<1>; + * length4 wr_count; + * stable_how4 wr_committed; + * verifier4 wr_writeverf; + * }; + * union offload_info4 switch (nfsstat4 coa_status) { + * case NFS4_OK: + * write_response4 coa_resok4; + * default: + * length4 coa_bytes_copied; + * }; + * struct CB_OFFLOAD4args { + * nfs_fh4 coa_fh; + * stateid4 coa_stateid; + * offload_info4 coa_offload_info; + * }; + */ +static void encode_offload_info4(struct xdr_stream *xdr, + __be32 nfserr, + const struct nfsd4_copy *cp) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 4); + *p++ = nfserr; + if (!nfserr) { + p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE); + p = xdr_encode_empty_array(p); + p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written); + *p++ = cpu_to_be32(cp->cp_res.wr_stable_how); + p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data, + NFS4_VERIFIER_SIZE); + } else { + p = xdr_reserve_space(xdr, 8); + /* We always return success if bytes were written */ + p = xdr_encode_hyper(p, 0); + } +} + +static void encode_cb_offload4args(struct xdr_stream *xdr, + __be32 nfserr, + const struct knfsd_fh *fh, + const struct nfsd4_copy *cp, + struct nfs4_cb_compound_hdr *hdr) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 4); + *p++ = cpu_to_be32(OP_CB_OFFLOAD); + encode_nfs_fh4(xdr, fh); + encode_stateid4(xdr, &cp->cp_res.cb_stateid); + encode_offload_info4(xdr, nfserr, cp); + + hdr->nops++; +} + +static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, + struct xdr_stream *xdr, + const void *data) +{ + const struct nfsd4_callback *cb = data; + const struct nfsd4_copy *cp = + container_of(cb, struct nfsd4_copy, cp_cb); + struct nfs4_cb_compound_hdr hdr = { + .ident = 0, + .minorversion = cb->cb_clp->cl_minorversion, + }; + + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr); + encode_cb_nops(&hdr); +} + +static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfsd4_callback *cb = data; + struct nfs4_cb_compound_hdr hdr; + int status; + + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) + return status; + + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status || cb->cb_seq_status)) + return status; + + return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status); +} /* * RPC procedure tables */ @@ -703,6 +783,7 @@ PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout), #endif PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), + PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), }; static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; @@ -743,27 +824,60 @@ static int max_cb_time(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - return max(nn->nfsd4_lease/10, (time_t)1) * HZ; + + /* + * nfsd4_lease is set to at most one hour in __nfsd4_write_time, + * so we can use 32-bit math on it. Warn if that assumption + * ever stops being true. + */ + if (WARN_ON_ONCE(nn->nfsd4_lease > 3600)) + return 360 * HZ; + + return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ; } -static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) +static struct workqueue_struct *callback_wq; + +static bool nfsd4_queue_cb(struct nfsd4_callback *cb) +{ + return queue_work(callback_wq, &cb->cb_work); +} + +static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) +{ + atomic_inc(&clp->cl_cb_inflight); +} + +static void nfsd41_cb_inflight_end(struct nfs4_client *clp) +{ + + if (atomic_dec_and_test(&clp->cl_cb_inflight)) + wake_up_var(&clp->cl_cb_inflight); +} + +static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp) +{ + wait_var_event(&clp->cl_cb_inflight, + !atomic_read(&clp->cl_cb_inflight)); +} + +static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) { if (clp->cl_minorversion == 0) { - char *principal = clp->cl_cred.cr_targ_princ ? - clp->cl_cred.cr_targ_princ : "nfs"; - struct rpc_cred *cred; + client->cl_principal = clp->cl_cred.cr_targ_princ ? + clp->cl_cred.cr_targ_princ : "nfs"; - cred = rpc_lookup_machine_cred(principal); - if (!IS_ERR(cred)) - get_rpccred(cred); - return cred; + return get_cred(rpc_machine_cred()); } else { - struct rpc_auth *auth = client->cl_auth; - struct auth_cred acred = {}; + struct cred *kcred; - acred.uid = ses->se_cb_sec.uid; - acred.gid = ses->se_cb_sec.gid; - return auth->au_ops->lookup_cred(client->cl_auth, &acred, 0); + kcred = prepare_kernel_cred(NULL); + if (!kcred) + return NULL; + + kcred->fsuid = ses->se_cb_sec.uid; + kcred->fsgid = ses->se_cb_sec.gid; + return kcred; } } @@ -784,14 +898,17 @@ .program = &cb_program, .version = 1, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), + .cred = current_cred(), }; struct rpc_clnt *client; - struct rpc_cred *cred; + const struct cred *cred; if (clp->cl_minorversion == 0) { if (!clp->cl_cred.cr_principal && - (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) + (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) { + trace_nfsd_cb_setup_err(clp, -EINVAL); return -EINVAL; + } args.client_name = clp->cl_cred.cr_principal; args.prognumber = conn->cb_prog; args.protocol = XPRT_TRANSPORT_TCP; @@ -800,7 +917,6 @@ } else { if (!conn->cb_xprt) return -EINVAL; - clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; @@ -811,24 +927,22 @@ /* Create RPC client */ client = rpc_create(&args); if (IS_ERR(client)) { - dprintk("NFSD: couldn't create callback client: %ld\n", - PTR_ERR(client)); + trace_nfsd_cb_setup_err(clp, PTR_ERR(client)); return PTR_ERR(client); } cred = get_backchannel_cred(clp, client, ses); - if (IS_ERR(cred)) { + if (!cred) { + trace_nfsd_cb_setup_err(clp, -ENOMEM); rpc_shutdown_client(client); - return PTR_ERR(cred); + return -ENOMEM; } + + if (clp->cl_minorversion != 0) + clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_client = client; clp->cl_cb_cred = cred; + trace_nfsd_cb_setup(clp); return 0; -} - -static void warn_no_callback_path(struct nfs4_client *clp, int reason) -{ - dprintk("NFSD: warning: no callback path to client %.*s: error %d\n", - (int)clp->cl_name.len, clp->cl_name.data, reason); } static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) @@ -836,7 +950,7 @@ if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) return; clp->cl_cb_state = NFSD4_CB_DOWN; - warn_no_callback_path(clp, reason); + trace_nfsd_cb_state(clp); } static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) @@ -844,26 +958,36 @@ if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) return; clp->cl_cb_state = NFSD4_CB_FAULT; - warn_no_callback_path(clp, reason); + trace_nfsd_cb_state(clp); } static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) { struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); + trace_nfsd_cb_done(clp, task->tk_status); if (task->tk_status) nfsd4_mark_cb_down(clp, task->tk_status); - else + else { clp->cl_cb_state = NFSD4_CB_UP; + trace_nfsd_cb_state(clp); + } +} + +static void nfsd4_cb_probe_release(void *calldata) +{ + struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); + + nfsd41_cb_inflight_end(clp); + } static const struct rpc_call_ops nfsd4_cb_probe_ops = { /* XXX: release method to ensure we set the cb channel down if * necessary on early failure? */ .rpc_call_done = nfsd4_cb_probe_done, + .rpc_release = nfsd4_cb_probe_release, }; - -static struct workqueue_struct *callback_wq; /* * Poke the callback thread to process any updates to the callback @@ -872,6 +996,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp) { clp->cl_cb_state = NFSD4_CB_UNKNOWN; + trace_nfsd_cb_state(clp); set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); nfsd4_run_cb(&clp->cl_cb_null); } @@ -888,6 +1013,7 @@ spin_lock(&clp->cl_lock); memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); spin_unlock(&clp->cl_lock); + trace_nfsd_cb_state(clp); } /* @@ -895,9 +1021,12 @@ * If the slot is available, then mark it busy. Otherwise, set the * thread for sleeping on the callback RPC wait queue. */ -static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) +static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task) { - if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { + struct nfs4_client *clp = cb->cb_clp; + + if (!cb->cb_holds_slot && + test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); /* Race breaker */ if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { @@ -906,7 +1035,29 @@ } rpc_wake_up_queued_task(&clp->cl_cb_waitq, task); } + cb->cb_holds_slot = true; return true; +} + +static void nfsd41_cb_release_slot(struct nfsd4_callback *cb) +{ + struct nfs4_client *clp = cb->cb_clp; + + if (cb->cb_holds_slot) { + cb->cb_holds_slot = false; + clear_bit(0, &clp->cl_cb_slot_busy); + rpc_wake_up_next(&clp->cl_cb_waitq); + } +} + +static void nfsd41_destroy_cb(struct nfsd4_callback *cb) +{ + struct nfs4_client *clp = cb->cb_clp; + + nfsd41_cb_release_slot(cb); + if (cb->cb_ops && cb->cb_ops->release) + cb->cb_ops->release(cb); + nfsd41_cb_inflight_end(clp); } /* @@ -925,11 +1076,8 @@ */ cb->cb_seq_status = 1; cb->cb_status = 0; - if (minorversion) { - if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task)) - return; - cb->cb_holds_slot = true; - } + if (minorversion && !nfsd41_cb_get_slot(cb, task)) + return; rpc_call_start(task); } @@ -949,7 +1097,7 @@ * the submission code will error out, so we don't need to * handle that case here. */ - if (task->tk_flags & RPC_TASK_KILLED) + if (RPC_SIGNALLED(task)) goto need_restart; return true; @@ -971,7 +1119,7 @@ break; case -ESERVERFAULT: ++session->se_cb_seq_nr; - /* Fall through */ + fallthrough; case 1: case -NFS4ERR_BADSESSION: nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status); @@ -992,17 +1140,16 @@ } break; default: + nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status); dprintk("%s: unprocessed error %d\n", __func__, cb->cb_seq_status); } - cb->cb_holds_slot = false; - clear_bit(0, &clp->cl_cb_slot_busy); - rpc_wake_up_next(&clp->cl_cb_waitq); + nfsd41_cb_release_slot(cb); dprintk("%s: freed slot, new seqid=%d\n", __func__, clp->cl_cb_session->se_cb_seq_nr); - if (task->tk_flags & RPC_TASK_KILLED) + if (RPC_SIGNALLED(task)) goto need_restart; out: return ret; @@ -1011,8 +1158,10 @@ ret = false; goto out; need_restart: - task->tk_status = 0; - cb->cb_need_restart = true; + if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) { + task->tk_status = 0; + cb->cb_need_restart = true; + } return false; } @@ -1021,8 +1170,7 @@ struct nfsd4_callback *cb = calldata; struct nfs4_client *clp = cb->cb_clp; - dprintk("%s: minorversion=%d\n", __func__, - clp->cl_minorversion); + trace_nfsd_cb_done(clp, task->tk_status); if (!nfsd4_cb_sequence_done(task, cb)) return; @@ -1038,10 +1186,12 @@ rpc_restart_call_prepare(task); return; case 1: - break; - case -1: - /* Network partition? */ - nfsd4_mark_cb_down(clp, task->tk_status); + switch (task->tk_status) { + case -EIO: + case -ETIMEDOUT: + case -EACCES: + nfsd4_mark_cb_down(clp, task->tk_status); + } break; default: BUG(); @@ -1053,9 +1203,9 @@ struct nfsd4_callback *cb = calldata; if (cb->cb_need_restart) - nfsd4_run_cb(cb); + nfsd4_queue_cb(cb); else - cb->cb_ops->release(cb); + nfsd41_destroy_cb(cb); } @@ -1089,6 +1239,7 @@ */ nfsd4_run_cb(&clp->cl_cb_null); flush_workqueue(callback_wq); + nfsd41_cb_inflight_wait_complete(clp); } /* requires cl_lock: */ @@ -1106,6 +1257,12 @@ return NULL; } +/* + * Note there isn't a lot of locking in this code; instead we depend on + * the fact that it is run from the callback_wq, which won't run two + * work items at once. So, for example, callback_wq handles all access + * of cl_cb_client and all calls to rpc_create or rpc_shutdown_client. + */ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) { struct nfs4_cb_conn conn; @@ -1119,9 +1276,10 @@ * kill the old client: */ if (clp->cl_cb_client) { + trace_nfsd_cb_shutdown(clp); rpc_shutdown_client(clp->cl_cb_client); clp->cl_cb_client = NULL; - put_rpccred(clp->cl_cb_cred); + put_cred(clp->cl_cb_cred); clp->cl_cb_cred = NULL; } if (clp->cl_cb_conn.cb_xprt) { @@ -1162,6 +1320,9 @@ container_of(work, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; + int flags; + + trace_nfsd_cb_work(clp, cb->cb_msg.rpc_proc->p_name); if (cb->cb_need_restart) { cb->cb_need_restart = false; @@ -1176,8 +1337,7 @@ clnt = clp->cl_cb_client; if (!clnt) { /* Callback channel broken, or client killed; give up: */ - if (cb->cb_ops && cb->cb_ops->release) - cb->cb_ops->release(cb); + nfsd41_destroy_cb(cb); return; } @@ -1186,11 +1346,13 @@ */ if (!cb->cb_ops && clp->cl_minorversion) { clp->cl_cb_state = NFSD4_CB_UP; + nfsd41_destroy_cb(cb); return; } cb->cb_msg.rpc_cred = clp->cl_cb_cred; - rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, + flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN; + rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags, cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); } @@ -1211,5 +1373,9 @@ void nfsd4_run_cb(struct nfsd4_callback *cb) { - queue_work(callback_wq, &cb->cb_work); + struct nfs4_client *clp = cb->cb_clp; + + nfsd41_cb_inflight_begin(clp); + if (!nfsd4_queue_cb(cb)) + nfsd41_cb_inflight_end(clp); } -- Gitblit v1.6.2