/* * Copyright (C) 2010 Philippe Gerum . * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ /* * This file implements object clusters, to group various related * runtime objects in named tables. Objects within clusters are * indexed on a string label. Depending on whether shared * multi-processing mode is enabled, clusters may be persistent in the * main heap. * * In its simplest form - when shared multi-processing is disabled -, * a cluster is basically a private hash table only known from the * process who created it. * * When shared multi-processing mode is enabled, a cluster is a shared * hash table indexed on a unique name within the main catalog. * Therefore, all objects referred to by the cluster should be laid * into the main heap as well. Multiple processes attached to the * same copperplate session do share the same main heap. Therefore, * they may share objects by providing: * * - the name of the cluster. * - the name of the object to retrieve from the cluster. * * Having objects shared between processes introduces the requirement * to deal with stale objects, created by processes that don't exist * anymore when a lookup is performed on a cluster by another * process. We deal with this issue as simply as we can, as follows: * * - each object referenced to by a cluster bears a "creator node" * identifier. This is basically the system-wide linux TID of the * process owning the thread which has initially added the object to * the cluster (i.e. getpid() as returned from the NPTL). * * - upon a lookup operation in the cluster which matches an object in * the table, the process who introduced the object is probed for * existence. If the process is gone, we silently drop the reference * to the orphaned object from the cluster, and return a failed lookup * status. Otherwise, the lookup succeeds. * * - when an attempt is made to index an object into cluster, any * conflicting object which bears the same name is checked for * staleness as described for the lookup operation. However, the * insertion succeeds after the reference to a conflicting stale * object was silently discarded. * * The test for existence based on the linux TID may return spurious * "true" results in case an object was created by a long gone * process, whose TID was eventually reused for a newer process, * before the process who initialized the main heap has exited. In * theory, this situation may happen; in practice, 1) the TID * generator has to wrap around fully before this happens, 2) multiple * processes sharing objects via a cluster are normally co-operating * to implement a global functionality. In the event of a process * exit, it is likely that the whole application system should be * reinited, thus the main (session) heap would be reset, which would * in turn clear the issue. * * In the worst case, using a stale object would never cause bad * memory references, since a clustered object - and all the memory * references it does via its members - must be laid into the main * heap, which is persistent until the last process attached to it * leaves the session. * * This stale object detection is essentially a sanity mechanism to * cleanup obviously wrong references from clusters after some process * died unexpectedly. Under normal circumstances, for an orderly exit, * a process should remove all references to objects it has created * from existing clusters, before eventually freeing those objects. * * In addition to the basic cluster object, the synchronizing cluster * (struct syncluster) provides support for waiting for a given object * to appear in the dictionary. */ #include #include #include #include "copperplate/heapobj.h" #include "copperplate/cluster.h" #include "copperplate/syncobj.h" #include "copperplate/threadobj.h" #include "copperplate/debug.h" #include "internal.h" const static struct hash_operations hash_operations; struct cluster_walk_data { struct cluster *c; int (*walk)(struct cluster *c, struct clusterobj *cobj); }; struct pvcluster_walk_data { struct pvcluster *c; int (*walk)(struct pvcluster *c, struct pvclusterobj *cobj); }; #ifdef CONFIG_XENO_PSHARED int cluster_init(struct cluster *c, const char *name) { struct dictionary *d; struct hashobj *hobj; int ret; /* * NOTE: it does not make sense to destroy a shared cluster * since other processes from the same session will likely * have references on it, so there is no cluster_destroy() * routine on purpose. When all processes from the session are * gone, the shared heap is cleared next time the application * boots, so there is really no use of deleting shared * clusters. */ redo: hobj = hash_search(&main_catalog, name, strlen(name), &hash_operations); if (hobj) { d = container_of(hobj, struct dictionary, hobj); ret = 0; goto out; } d = xnmalloc(sizeof(*d)); if (d == NULL) return __bt(-ENOMEM); hash_init(&d->table); ret = hash_enter(&main_catalog, name, strlen(name), &d->hobj, &hash_operations); /* * If someone managed to slip in, creating the cluster between * the table look up and indexing the new cluster, retry the * whole process. */ if (ret == -EEXIST) { hash_destroy(&d->table); xnfree(d); goto redo; } out: c->d = d; return __bt(ret); } static int cluster_probe(struct hashobj *hobj) { struct clusterobj *cobj; cobj = container_of(hobj, struct clusterobj, hobj); if (cobj->cnode == __node_id) return 1; /* Trivial check: is it ours? */ /* * The node identifier is actually the main thread pid, so if * we can send the latter a signal, the node is deemed active. * Over Cobalt, the main thread is always shadowed, therefore * we may use Cobalt's kill() service to probe for it. * Receiving EPERM does mean that we found an active node, * just that we don't have the credentials to actually send it * a signal. */ return copperplate_probe_tid(cobj->cnode) == 0; } int cluster_addobj(struct cluster *c, const char *name, struct clusterobj *cobj) { cobj->cnode = __node_id; /* * Add object to cluster and probe conflicting entries for * owner node existence, overwriting dead instances on the * fly. */ return hash_enter_probe(&c->d->table, name, strlen(name), &cobj->hobj, &hash_operations); } int cluster_addobj_dup(struct cluster *c, const char *name, struct clusterobj *cobj) { cobj->cnode = __node_id; /* * Same as cluster_addobj(), but allows for duplicate keys in * live objects. */ return hash_enter_probe_dup(&c->d->table, name, strlen(name), &cobj->hobj, &hash_operations); } int cluster_delobj(struct cluster *c, struct clusterobj *cobj) { return __bt(hash_remove(&c->d->table, &cobj->hobj, &hash_operations)); } struct clusterobj *cluster_findobj(struct cluster *c, const char *name) { struct hashobj *hobj; /* * Search for object entry and probe for owner node existence, * discarding dead instances on the fly. */ hobj = hash_search_probe(&c->d->table, name, strlen(name), &hash_operations); if (hobj == NULL) return NULL; return container_of(hobj, struct clusterobj, hobj); } static int __cluster_walk(struct hash_table *t, struct hashobj *hobj, void *arg) { struct cluster_walk_data *wd = arg; struct clusterobj *cobj; cobj = container_of(hobj, struct clusterobj, hobj); return wd->walk(wd->c, cobj); } int cluster_walk(struct cluster *c, int (*walk)(struct cluster *c, struct clusterobj *cobj)) { struct cluster_walk_data wd = { .c = c, .walk = walk, }; return hash_walk(&c->d->table, __cluster_walk, &wd); } int syncluster_init(struct syncluster *sc, const char *name) { struct syndictionary *d; struct hashobj *hobj; int ret; redo: hobj = hash_search(&main_catalog, name, strlen(name), &hash_operations); if (hobj) { sc->d = container_of(hobj, struct syndictionary, hobj); return 0; } d = xnmalloc(sizeof(*d)); if (d == NULL) return -ENOMEM; hash_init(&d->table); ret = hash_enter(&main_catalog, name, strlen(name), &d->hobj, &hash_operations); /* * Same as cluster_init(), redo if someone slipped in, * creating the cluster. */ if (ret == -EEXIST) { hash_destroy(&d->table); xnfree(d); goto redo; } sc->d = d; return syncobj_init(&d->sobj, CLOCK_COPPERPLATE, SYNCOBJ_FIFO, fnref_null); } int syncluster_addobj(struct syncluster *sc, const char *name, struct clusterobj *cobj) { struct syncluster_wait_struct *wait; struct threadobj *thobj, *tmp; struct syncstate syns; int ret; ret = syncobj_lock(&sc->d->sobj, &syns); if (ret) return __bt(ret); cobj->cnode = __node_id; ret = hash_enter_probe(&sc->d->table, name, strlen(name), &cobj->hobj, &hash_operations); if (ret) goto out; if (!syncobj_grant_wait_p(&sc->d->sobj)) goto out; /* * Wake up all threads waiting for this key to appear in the * dictionary. */ syncobj_for_each_grant_waiter_safe(&sc->d->sobj, thobj, tmp) { wait = threadobj_get_wait(thobj); if (strcmp(__mptr(wait->name_ref), name) == 0) syncobj_grant_to(&sc->d->sobj, thobj); } out: syncobj_unlock(&sc->d->sobj, &syns); return ret; } int syncluster_delobj(struct syncluster *sc, struct clusterobj *cobj) { struct syncstate syns; int ret; ret = syncobj_lock(&sc->d->sobj, &syns); if (ret) return ret; ret = __bt(hash_remove(&sc->d->table, &cobj->hobj, &hash_operations)); syncobj_unlock(&sc->d->sobj, &syns); return ret; } int syncluster_findobj(struct syncluster *sc, const char *name, const struct timespec *timeout, struct clusterobj **cobjp) { struct syncluster_wait_struct *wait = NULL; struct syncstate syns; struct hashobj *hobj; int ret = 0; ret = syncobj_lock(&sc->d->sobj, &syns); if (ret) return ret; for (;;) { hobj = hash_search_probe(&sc->d->table, name, strlen(name), &hash_operations); if (hobj) { *cobjp = container_of(hobj, struct clusterobj, hobj); break; } if (timeout && timeout->tv_sec == 0 && timeout->tv_nsec == 0) { ret = -EWOULDBLOCK; break; } if (!threadobj_current_p()) { ret = -EPERM; break; } if (wait == NULL) { wait = threadobj_prepare_wait(struct syncluster_wait_struct); wait->name_ref = __moff(xnstrdup(name)); } ret = syncobj_wait_grant(&sc->d->sobj, timeout, &syns); if (ret) { if (ret == -EIDRM) goto out; break; } } syncobj_unlock(&sc->d->sobj, &syns); out: if (wait) { xnfree(__mptr(wait->name_ref)); threadobj_finish_wait(); } return ret; } const static struct hash_operations hash_operations = { .compare = memcmp, .probe = cluster_probe, .alloc = xnmalloc, .free = xnfree, }; const static struct pvhash_operations pvhash_operations = { .compare = memcmp, }; #else /* !CONFIG_XENO_PSHARED */ const static struct hash_operations hash_operations = { .compare = memcmp, }; #endif /* !CONFIG_XENO_PSHARED */ int pvcluster_init(struct pvcluster *c, const char *name) { pvhash_init(&c->table); return 0; } void pvcluster_destroy(struct pvcluster *c) { /* nop */ } int pvcluster_addobj(struct pvcluster *c, const char *name, struct pvclusterobj *cobj) { return pvhash_enter(&c->table, name, strlen(name), &cobj->hobj, &pvhash_operations); } int pvcluster_addobj_dup(struct pvcluster *c, const char *name, struct pvclusterobj *cobj) { return pvhash_enter_dup(&c->table, name, strlen(name), &cobj->hobj, &pvhash_operations); } int pvcluster_delobj(struct pvcluster *c, struct pvclusterobj *cobj) { return __bt(pvhash_remove(&c->table, &cobj->hobj, &pvhash_operations)); } struct pvclusterobj *pvcluster_findobj(struct pvcluster *c, const char *name) { struct pvhashobj *hobj; hobj = pvhash_search(&c->table, name, strlen(name), &pvhash_operations); if (hobj == NULL) return NULL; return container_of(hobj, struct pvclusterobj, hobj); } static int __pvcluster_walk(struct pvhash_table *t, struct pvhashobj *hobj, void *arg) { struct pvcluster_walk_data *wd = arg; struct pvclusterobj *cobj; cobj = container_of(hobj, struct pvclusterobj, hobj); return wd->walk(wd->c, cobj); } int pvcluster_walk(struct pvcluster *c, int (*walk)(struct pvcluster *c, struct pvclusterobj *cobj)) { struct pvcluster_walk_data wd = { .c = c, .walk = walk, }; return pvhash_walk(&c->table, __pvcluster_walk, &wd); } int pvsyncluster_init(struct pvsyncluster *sc, const char *name) { int ret; ret = __bt(pvcluster_init(&sc->c, name)); if (ret) return ret; /* * Assuming pvcluster_destroy() is a nop, so we don't need to * run any finalizer. */ return syncobj_init(&sc->sobj, CLOCK_COPPERPLATE, SYNCOBJ_FIFO, fnref_null); } void pvsyncluster_destroy(struct pvsyncluster *sc) { struct syncstate syns; if (__bt(syncobj_lock(&sc->sobj, &syns))) return; /* No finalizer, we just destroy the synchro. */ syncobj_destroy(&sc->sobj, &syns); } int pvsyncluster_addobj(struct pvsyncluster *sc, const char *name, struct pvclusterobj *cobj) { struct syncluster_wait_struct *wait; struct threadobj *thobj, *tmp; struct syncstate syns; int ret; ret = syncobj_lock(&sc->sobj, &syns); if (ret) return __bt(ret); ret = pvcluster_addobj(&sc->c, name, cobj); if (ret) goto out; if (!syncobj_grant_wait_p(&sc->sobj)) goto out; /* * Wake up all threads waiting for this key to appear in the * dictionary. */ syncobj_for_each_grant_waiter_safe(&sc->sobj, thobj, tmp) { wait = threadobj_get_wait(thobj); if (strcmp(wait->name, name) == 0) syncobj_grant_to(&sc->sobj, thobj); } out: syncobj_unlock(&sc->sobj, &syns); return ret; } int pvsyncluster_delobj(struct pvsyncluster *sc, struct pvclusterobj *cobj) { struct syncstate syns; int ret; ret = syncobj_lock(&sc->sobj, &syns); if (ret) return ret; ret = __bt(pvcluster_delobj(&sc->c, cobj)); syncobj_unlock(&sc->sobj, &syns); return ret; } int pvsyncluster_findobj(struct pvsyncluster *sc, const char *name, const struct timespec *timeout, struct pvclusterobj **cobjp) { struct syncluster_wait_struct *wait = NULL; struct pvclusterobj *cobj; struct syncstate syns; int ret = 0; ret = syncobj_lock(&sc->sobj, &syns); if (ret) return ret; for (;;) { cobj = pvcluster_findobj(&sc->c, name); if (cobj) { *cobjp = cobj; break; } if (timeout && timeout->tv_sec == 0 && timeout->tv_nsec == 0) { ret = -EWOULDBLOCK; break; } if (!threadobj_current_p()) { ret = -EPERM; break; } if (wait == NULL) { wait = threadobj_prepare_wait(struct syncluster_wait_struct); wait->name = name; } ret = syncobj_wait_grant(&sc->sobj, timeout, &syns); if (ret) { if (ret == -EIDRM) goto out; break; } } syncobj_unlock(&sc->sobj, &syns); out: if (wait) threadobj_finish_wait(); return ret; }