From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/net/ceph/ceph_common.c | 568 +++++++++++++++++++++++++++++++------------------------- 1 files changed, 315 insertions(+), 253 deletions(-) diff --git a/kernel/net/ceph/ceph_common.c b/kernel/net/ceph/ceph_common.c index 20f0c3d..4e7edd7 100644 --- a/kernel/net/ceph/ceph_common.c +++ b/kernel/net/ceph/ceph_common.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only #include <linux/ceph/ceph_debug.h> #include <linux/backing-dev.h> @@ -10,8 +11,9 @@ #include <linux/module.h> #include <linux/mount.h> #include <linux/nsproxy.h> -#include <linux/parser.h> +#include <linux/fs_parser.h> #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/statfs.h> @@ -174,6 +176,10 @@ } } + ret = ceph_compare_crush_locs(&opt1->crush_locs, &opt2->crush_locs); + if (ret) + return ret; + /* any matching mon ip implies a match */ for (i = 0; i < opt1->num_mon; i++) { if (ceph_monmap_contains(client->monc.monmap, @@ -184,17 +190,32 @@ } EXPORT_SYMBOL(ceph_compare_options); +/* + * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are + * compatible with (a superset of) GFP_KERNEL. This is because while the + * actual pages are allocated with the specified flags, the page table pages + * are always allocated with GFP_KERNEL. + * + * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO. + */ void *ceph_kvmalloc(size_t size, gfp_t flags) { - if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { - void *ptr = kmalloc(size, flags | __GFP_NOWARN); - if (ptr) - return ptr; + void *p; + + if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) { + p = kvmalloc(size, flags); + } else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) { + unsigned int nofs_flag = memalloc_nofs_save(); + p = kvmalloc(size, GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); + } else { + unsigned int noio_flag = memalloc_noio_save(); + p = kvmalloc(size, GFP_KERNEL); + memalloc_noio_restore(noio_flag); } - return __vmalloc(size, flags, PAGE_KERNEL); + return p; } - static int parse_fsid(const char *str, struct ceph_fsid *fsid) { @@ -236,56 +257,93 @@ Opt_mount_timeout, Opt_osd_idle_ttl, Opt_osd_request_timeout, - Opt_last_int, /* int args above */ Opt_fsid, Opt_name, Opt_secret, Opt_key, Opt_ip, - Opt_last_string, + Opt_crush_location, + Opt_read_from_replica, /* string args above */ Opt_share, - Opt_noshare, Opt_crc, - Opt_nocrc, Opt_cephx_require_signatures, - Opt_nocephx_require_signatures, Opt_cephx_sign_messages, - Opt_nocephx_sign_messages, Opt_tcp_nodelay, - Opt_notcp_nodelay, + Opt_abort_on_full, }; -static match_table_t opt_tokens = { - {Opt_osdtimeout, "osdtimeout=%d"}, - {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, - {Opt_mount_timeout, "mount_timeout=%d"}, - {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, - {Opt_osd_request_timeout, "osd_request_timeout=%d"}, - /* int args above */ - {Opt_fsid, "fsid=%s"}, - {Opt_name, "name=%s"}, - {Opt_secret, "secret=%s"}, - {Opt_key, "key=%s"}, - {Opt_ip, "ip=%s"}, - /* string args above */ - {Opt_share, "share"}, - {Opt_noshare, "noshare"}, - {Opt_crc, "crc"}, - {Opt_nocrc, "nocrc"}, - {Opt_cephx_require_signatures, "cephx_require_signatures"}, - {Opt_nocephx_require_signatures, "nocephx_require_signatures"}, - {Opt_cephx_sign_messages, "cephx_sign_messages"}, - {Opt_nocephx_sign_messages, "nocephx_sign_messages"}, - {Opt_tcp_nodelay, "tcp_nodelay"}, - {Opt_notcp_nodelay, "notcp_nodelay"}, - {-1, NULL} +enum { + Opt_read_from_replica_no, + Opt_read_from_replica_balance, + Opt_read_from_replica_localize, }; + +static const struct constant_table ceph_param_read_from_replica[] = { + {"no", Opt_read_from_replica_no}, + {"balance", Opt_read_from_replica_balance}, + {"localize", Opt_read_from_replica_localize}, + {} +}; + +static const struct fs_parameter_spec ceph_parameters[] = { + fsparam_flag ("abort_on_full", Opt_abort_on_full), + fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures), + fsparam_flag_no ("cephx_sign_messages", Opt_cephx_sign_messages), + fsparam_flag_no ("crc", Opt_crc), + fsparam_string ("crush_location", Opt_crush_location), + fsparam_string ("fsid", Opt_fsid), + fsparam_string ("ip", Opt_ip), + fsparam_string ("key", Opt_key), + fsparam_u32 ("mount_timeout", Opt_mount_timeout), + fsparam_string ("name", Opt_name), + fsparam_u32 ("osd_idle_ttl", Opt_osd_idle_ttl), + fsparam_u32 ("osd_request_timeout", Opt_osd_request_timeout), + fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout), + __fsparam (fs_param_is_s32, "osdtimeout", Opt_osdtimeout, + fs_param_deprecated, NULL), + fsparam_enum ("read_from_replica", Opt_read_from_replica, + ceph_param_read_from_replica), + fsparam_string ("secret", Opt_secret), + fsparam_flag_no ("share", Opt_share), + fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay), + {} +}; + +struct ceph_options *ceph_alloc_options(void) +{ + struct ceph_options *opt; + + opt = kzalloc(sizeof(*opt), GFP_KERNEL); + if (!opt) + return NULL; + + opt->crush_locs = RB_ROOT; + opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), + GFP_KERNEL); + if (!opt->mon_addr) { + kfree(opt); + return NULL; + } + + opt->flags = CEPH_OPT_DEFAULT; + opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; + opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; + opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; + opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; + opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT; + return opt; +} +EXPORT_SYMBOL(ceph_alloc_options); void ceph_destroy_options(struct ceph_options *opt) { dout("destroy_options %p\n", opt); + if (!opt) + return; + + ceph_clear_crush_locs(&opt->crush_locs); kfree(opt->name); if (opt->key) { ceph_crypto_key_destroy(opt->key); @@ -297,7 +355,9 @@ EXPORT_SYMBOL(ceph_destroy_options); /* get secret from key store */ -static int get_secret(struct ceph_crypto_key *dst, const char *name) { +static int get_secret(struct ceph_crypto_key *dst, const char *name, + struct p_log *log) +{ struct key *ukey; int key_err; int err = 0; @@ -310,20 +370,20 @@ key_err = PTR_ERR(ukey); switch (key_err) { case -ENOKEY: - pr_warn("ceph: Mount failed due to key not found: %s\n", - name); + error_plog(log, "Failed due to key not found: %s", + name); break; case -EKEYEXPIRED: - pr_warn("ceph: Mount failed due to expired key: %s\n", - name); + error_plog(log, "Failed due to expired key: %s", + name); break; case -EKEYREVOKED: - pr_warn("ceph: Mount failed due to revoked key: %s\n", - name); + error_plog(log, "Failed due to revoked key: %s", + name); break; default: - pr_warn("ceph: Mount failed due to unknown key error %d: %s\n", - key_err, name); + error_plog(log, "Failed due to key error %d: %s", + key_err, name); } err = -EPERM; goto out; @@ -341,218 +401,191 @@ return err; } -struct ceph_options * -ceph_parse_options(char *options, const char *dev_name, - const char *dev_name_end, - int (*parse_extra_token)(char *c, void *private), - void *private) +int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt, + struct fc_log *l) { - struct ceph_options *opt; - const char *c; - int err = -ENOMEM; - substring_t argstr[MAX_OPT_ARGS]; + struct p_log log = {.prefix = "libceph", .log = l}; + int ret; - opt = kzalloc(sizeof(*opt), GFP_KERNEL); - if (!opt) - return ERR_PTR(-ENOMEM); - opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), - GFP_KERNEL); - if (!opt->mon_addr) - goto out; - - dout("parse_options %p options '%s' dev_name '%s'\n", opt, options, - dev_name); - - /* start with defaults */ - opt->flags = CEPH_OPT_DEFAULT; - opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; - opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; - opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; - opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; - - /* get mon ip(s) */ /* ip1[:port1][,ip2[:port2]...] */ - err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr, - CEPH_MAX_MON, &opt->num_mon); - if (err < 0) - goto out; - - /* parse mount options */ - while ((c = strsep(&options, ",")) != NULL) { - int token, intval; - if (!*c) - continue; - err = -EINVAL; - token = match_token((char *)c, opt_tokens, argstr); - if (token < 0 && parse_extra_token) { - /* extra? */ - err = parse_extra_token((char *)c, private); - if (err < 0) { - pr_err("bad option at '%s'\n", c); - goto out; - } - continue; - } - if (token < Opt_last_int) { - err = match_int(&argstr[0], &intval); - if (err < 0) { - pr_err("bad option arg (not int) at '%s'\n", c); - goto out; - } - dout("got int token %d val %d\n", token, intval); - } else if (token > Opt_last_int && token < Opt_last_string) { - dout("got string token %d val %s\n", token, - argstr[0].from); - } else { - dout("got token %d\n", token); - } - switch (token) { - case Opt_ip: - err = ceph_parse_ips(argstr[0].from, - argstr[0].to, - &opt->my_addr, - 1, NULL); - if (err < 0) - goto out; - opt->flags |= CEPH_OPT_MYIP; - break; - - case Opt_fsid: - err = parse_fsid(argstr[0].from, &opt->fsid); - if (err == 0) - opt->flags |= CEPH_OPT_FSID; - break; - case Opt_name: - kfree(opt->name); - opt->name = kstrndup(argstr[0].from, - argstr[0].to-argstr[0].from, - GFP_KERNEL); - if (!opt->name) { - err = -ENOMEM; - goto out; - } - break; - case Opt_secret: - ceph_crypto_key_destroy(opt->key); - kfree(opt->key); - - opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); - if (!opt->key) { - err = -ENOMEM; - goto out; - } - err = ceph_crypto_key_unarmor(opt->key, argstr[0].from); - if (err < 0) - goto out; - break; - case Opt_key: - ceph_crypto_key_destroy(opt->key); - kfree(opt->key); - - opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); - if (!opt->key) { - err = -ENOMEM; - goto out; - } - err = get_secret(opt->key, argstr[0].from); - if (err < 0) - goto out; - break; - - /* misc */ - case Opt_osdtimeout: - pr_warn("ignoring deprecated osdtimeout option\n"); - break; - case Opt_osdkeepalivetimeout: - /* 0 isn't well defined right now, reject it */ - if (intval < 1 || intval > INT_MAX / 1000) { - pr_err("osdkeepalive out of range\n"); - err = -EINVAL; - goto out; - } - opt->osd_keepalive_timeout = - msecs_to_jiffies(intval * 1000); - break; - case Opt_osd_idle_ttl: - /* 0 isn't well defined right now, reject it */ - if (intval < 1 || intval > INT_MAX / 1000) { - pr_err("osd_idle_ttl out of range\n"); - err = -EINVAL; - goto out; - } - opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000); - break; - case Opt_mount_timeout: - /* 0 is "wait forever" (i.e. infinite timeout) */ - if (intval < 0 || intval > INT_MAX / 1000) { - pr_err("mount_timeout out of range\n"); - err = -EINVAL; - goto out; - } - opt->mount_timeout = msecs_to_jiffies(intval * 1000); - break; - case Opt_osd_request_timeout: - /* 0 is "wait forever" (i.e. infinite timeout) */ - if (intval < 0 || intval > INT_MAX / 1000) { - pr_err("osd_request_timeout out of range\n"); - err = -EINVAL; - goto out; - } - opt->osd_request_timeout = msecs_to_jiffies(intval * 1000); - break; - - case Opt_share: - opt->flags &= ~CEPH_OPT_NOSHARE; - break; - case Opt_noshare: - opt->flags |= CEPH_OPT_NOSHARE; - break; - - case Opt_crc: - opt->flags &= ~CEPH_OPT_NOCRC; - break; - case Opt_nocrc: - opt->flags |= CEPH_OPT_NOCRC; - break; - - case Opt_cephx_require_signatures: - opt->flags &= ~CEPH_OPT_NOMSGAUTH; - break; - case Opt_nocephx_require_signatures: - opt->flags |= CEPH_OPT_NOMSGAUTH; - break; - case Opt_cephx_sign_messages: - opt->flags &= ~CEPH_OPT_NOMSGSIGN; - break; - case Opt_nocephx_sign_messages: - opt->flags |= CEPH_OPT_NOMSGSIGN; - break; - - case Opt_tcp_nodelay: - opt->flags |= CEPH_OPT_TCP_NODELAY; - break; - case Opt_notcp_nodelay: - opt->flags &= ~CEPH_OPT_TCP_NODELAY; - break; - - default: - BUG_ON(token); - } + ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON, + &opt->num_mon); + if (ret) { + error_plog(&log, "Failed to parse monitor IPs: %d", ret); + return ret; } - /* success */ - return opt; - -out: - ceph_destroy_options(opt); - return ERR_PTR(err); + return 0; } -EXPORT_SYMBOL(ceph_parse_options); +EXPORT_SYMBOL(ceph_parse_mon_ips); -int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) +int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, + struct fc_log *l) +{ + struct fs_parse_result result; + int token, err; + struct p_log log = {.prefix = "libceph", .log = l}; + + token = __fs_parse(&log, ceph_parameters, param, &result); + dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); + if (token < 0) + return token; + + switch (token) { + case Opt_ip: + err = ceph_parse_ips(param->string, + param->string + param->size, + &opt->my_addr, + 1, NULL); + if (err) { + error_plog(&log, "Failed to parse ip: %d", err); + return err; + } + opt->flags |= CEPH_OPT_MYIP; + break; + + case Opt_fsid: + err = parse_fsid(param->string, &opt->fsid); + if (err) { + error_plog(&log, "Failed to parse fsid: %d", err); + return err; + } + opt->flags |= CEPH_OPT_FSID; + break; + case Opt_name: + kfree(opt->name); + opt->name = param->string; + param->string = NULL; + break; + case Opt_secret: + ceph_crypto_key_destroy(opt->key); + kfree(opt->key); + + opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); + if (!opt->key) + return -ENOMEM; + err = ceph_crypto_key_unarmor(opt->key, param->string); + if (err) { + error_plog(&log, "Failed to parse secret: %d", err); + return err; + } + break; + case Opt_key: + ceph_crypto_key_destroy(opt->key); + kfree(opt->key); + + opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); + if (!opt->key) + return -ENOMEM; + return get_secret(opt->key, param->string, &log); + case Opt_crush_location: + ceph_clear_crush_locs(&opt->crush_locs); + err = ceph_parse_crush_location(param->string, + &opt->crush_locs); + if (err) { + error_plog(&log, "Failed to parse CRUSH location: %d", + err); + return err; + } + break; + case Opt_read_from_replica: + switch (result.uint_32) { + case Opt_read_from_replica_no: + opt->read_from_replica = 0; + break; + case Opt_read_from_replica_balance: + opt->read_from_replica = CEPH_OSD_FLAG_BALANCE_READS; + break; + case Opt_read_from_replica_localize: + opt->read_from_replica = CEPH_OSD_FLAG_LOCALIZE_READS; + break; + default: + BUG(); + } + break; + + case Opt_osdtimeout: + warn_plog(&log, "Ignoring osdtimeout"); + break; + case Opt_osdkeepalivetimeout: + /* 0 isn't well defined right now, reject it */ + if (result.uint_32 < 1 || result.uint_32 > INT_MAX / 1000) + goto out_of_range; + opt->osd_keepalive_timeout = + msecs_to_jiffies(result.uint_32 * 1000); + break; + case Opt_osd_idle_ttl: + /* 0 isn't well defined right now, reject it */ + if (result.uint_32 < 1 || result.uint_32 > INT_MAX / 1000) + goto out_of_range; + opt->osd_idle_ttl = msecs_to_jiffies(result.uint_32 * 1000); + break; + case Opt_mount_timeout: + /* 0 is "wait forever" (i.e. infinite timeout) */ + if (result.uint_32 > INT_MAX / 1000) + goto out_of_range; + opt->mount_timeout = msecs_to_jiffies(result.uint_32 * 1000); + break; + case Opt_osd_request_timeout: + /* 0 is "wait forever" (i.e. infinite timeout) */ + if (result.uint_32 > INT_MAX / 1000) + goto out_of_range; + opt->osd_request_timeout = + msecs_to_jiffies(result.uint_32 * 1000); + break; + + case Opt_share: + if (!result.negated) + opt->flags &= ~CEPH_OPT_NOSHARE; + else + opt->flags |= CEPH_OPT_NOSHARE; + break; + case Opt_crc: + if (!result.negated) + opt->flags &= ~CEPH_OPT_NOCRC; + else + opt->flags |= CEPH_OPT_NOCRC; + break; + case Opt_cephx_require_signatures: + if (!result.negated) + opt->flags &= ~CEPH_OPT_NOMSGAUTH; + else + opt->flags |= CEPH_OPT_NOMSGAUTH; + break; + case Opt_cephx_sign_messages: + if (!result.negated) + opt->flags &= ~CEPH_OPT_NOMSGSIGN; + else + opt->flags |= CEPH_OPT_NOMSGSIGN; + break; + case Opt_tcp_nodelay: + if (!result.negated) + opt->flags |= CEPH_OPT_TCP_NODELAY; + else + opt->flags &= ~CEPH_OPT_TCP_NODELAY; + break; + + case Opt_abort_on_full: + opt->flags |= CEPH_OPT_ABORT_ON_FULL; + break; + + default: + BUG(); + } + + return 0; + +out_of_range: + return inval_plog(&log, "%s out of range", param->key); +} +EXPORT_SYMBOL(ceph_parse_param); + +int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, + bool show_all) { struct ceph_options *opt = client->options; size_t pos = m->count; + struct rb_node *n; if (opt->name) { seq_puts(m, "name="); @@ -561,6 +594,28 @@ } if (opt->key) seq_puts(m, "secret=<hidden>,"); + + if (!RB_EMPTY_ROOT(&opt->crush_locs)) { + seq_puts(m, "crush_location="); + for (n = rb_first(&opt->crush_locs); ; ) { + struct crush_loc_node *loc = + rb_entry(n, struct crush_loc_node, cl_node); + + seq_printf(m, "%s:%s", loc->cl_loc.cl_type_name, + loc->cl_loc.cl_name); + n = rb_next(n); + if (!n) + break; + + seq_putc(m, '|'); + } + seq_putc(m, ','); + } + if (opt->read_from_replica == CEPH_OSD_FLAG_BALANCE_READS) { + seq_puts(m, "read_from_replica=balance,"); + } else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) { + seq_puts(m, "read_from_replica=localize,"); + } if (opt->flags & CEPH_OPT_FSID) seq_printf(m, "fsid=%pU,", &opt->fsid); @@ -574,6 +629,8 @@ seq_puts(m, "nocephx_sign_messages,"); if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) seq_puts(m, "notcp_nodelay,"); + if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL)) + seq_puts(m, "abort_on_full,"); if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) seq_printf(m, "mount_timeout=%d,", @@ -684,6 +741,14 @@ } EXPORT_SYMBOL(ceph_destroy_client); +void ceph_reset_client_addr(struct ceph_client *client) +{ + ceph_messenger_reset_nonce(&client->msgr); + ceph_monc_reopen_session(&client->monc); + ceph_osdc_reopen_osds(&client->osdc); +} +EXPORT_SYMBOL(ceph_reset_client_addr); + /* * true if we have the mon map (and have thus joined the cluster) */ @@ -766,9 +831,7 @@ { int ret = 0; - ret = ceph_debugfs_init(); - if (ret < 0) - goto out; + ceph_debugfs_init(); ret = ceph_crypto_init(); if (ret < 0) @@ -793,7 +856,6 @@ ceph_crypto_shutdown(); out_debugfs: ceph_debugfs_cleanup(); -out: return ret; } -- Gitblit v1.6.2