[LEDE-DEV] [PATCH] procd: stop service using SIGKILL if SIGTERM failed to do so
John Crispin
john at phrozen.org
Thu Feb 9 02:54:08 PST 2017
Hi,
i know that someone else is about to send a fix for the same issue but
with a different approach of fixing it. i'd like to wait for this 2nd
patch to arrive before we decide which to merge
John
On 09/02/2017 11:02, Alin Nastac wrote:
> SIGKILL is sent if instance process is still running after
> <term_timeout> seconds after SIGTERM has been sent. To prevent
> another daemon process being launched before old process dies,
> the instance is kept until SIGCHLD confirms that service has
> been stopped.
>
> Signed-off-by: Alin Nastac <alin.nastac at gmail.com>
> ---
> service/instance.c | 44 +++++++++++++++++++++++++++++++++++++-------
> service/instance.h | 1 +
> service/service.c | 26 ++++++++++++++++----------
> service/service.h | 3 +++
> 4 files changed, 57 insertions(+), 17 deletions(-)
>
> diff --git a/service/instance.c b/service/instance.c
> index 018db3c..4d340fd 100644
> --- a/service/instance.c
> +++ b/service/instance.c
> @@ -55,6 +55,7 @@ enum {
> INSTANCE_ATTR_SECCOMP,
> INSTANCE_ATTR_PIDFILE,
> INSTANCE_ATTR_RELOADSIG,
> + INSTANCE_ATTR_TERMTIMEOUT,
> __INSTANCE_ATTR_MAX
> };
>
> @@ -79,6 +80,7 @@ static const struct blobmsg_policy instance_attr[__INSTANCE_ATTR_MAX] = {
> [INSTANCE_ATTR_SECCOMP] = { "seccomp", BLOBMSG_TYPE_STRING },
> [INSTANCE_ATTR_PIDFILE] = { "pidfile", BLOBMSG_TYPE_STRING },
> [INSTANCE_ATTR_RELOADSIG] = { "reload_signal", BLOBMSG_TYPE_INT32 },
> + [INSTANCE_ATTR_TERMTIMEOUT] = { "term_timeout", BLOBMSG_TYPE_INT32 },
> };
>
> enum {
> @@ -389,8 +391,16 @@ instance_start(struct service_instance *in)
> return;
> }
>
> - if (in->proc.pending || !in->command)
> + if (!in->command) {
> + LOG("Not starting instance %s::%s, command not set\n", in->srv->name, in->name);
> return;
> + }
> +
> + if (in->proc.pending) {
> + if (in->halt)
> + in->restart = true;
> + return;
> + }
>
> instance_free_stdio(in);
> if (in->_stdout.fd.fd > -2) {
> @@ -408,7 +418,7 @@ instance_start(struct service_instance *in)
> }
>
> in->restart = false;
> - in->halt = !in->respawn;
> + in->halt = false;
>
> if (!in->valid)
> return;
> @@ -494,7 +504,11 @@ instance_timeout(struct uloop_timeout *t)
>
> in = container_of(t, struct service_instance, timeout);
>
> - if (!in->halt && (in->restart || in->respawn))
> + if (in->halt) {
> + LOG("Instance %s::%s pid %d not stopped on SIGTERM, sending SIGKILL instead\n",
> + in->srv->name, in->name, in->proc.pid);
> + kill(in->proc.pid, SIGKILL);
> + } else if (in->restart || in->respawn)
> instance_start(in);
> }
>
> @@ -515,8 +529,19 @@ instance_exit(struct uloop_process *p, int ret)
> return;
>
> uloop_timeout_cancel(&in->timeout);
> + service_event("instance.stop", in->srv->name, in->name);
> +
> if (in->halt) {
> instance_removepid(in);
> + if (in->restart)
> + instance_start(in);
> + else {
> + struct service *s = in->srv;
> +
> + avl_delete(&s->instances.avl, &in->node.avl);
> + instance_free(in);
> + service_stopped(s);
> + }
> } else if (in->restart) {
> instance_start(in);
> } else if (in->respawn) {
> @@ -535,7 +560,6 @@ instance_exit(struct uloop_process *p, int ret)
> uloop_timeout_set(&in->timeout, in->respawn_timeout * 1000);
> }
> }
> - service_event("instance.stop", in->srv->name, in->name);
> }
>
> void
> @@ -546,6 +570,7 @@ instance_stop(struct service_instance *in)
> in->halt = true;
> in->restart = in->respawn = false;
> kill(in->proc.pid, SIGTERM);
> + uloop_timeout_set(&in->timeout, in->term_timeout * 1000);
> }
>
> static void
> @@ -559,10 +584,10 @@ instance_restart(struct service_instance *in)
> return;
> }
>
> - in->halt = false;
> + in->halt = true;
> in->restart = true;
> kill(in->proc.pid, SIGTERM);
> - instance_removepid(in);
> + uloop_timeout_set(&in->timeout, in->term_timeout * 1000);
> }
>
> static bool
> @@ -796,6 +821,8 @@ instance_config_parse(struct service_instance *in)
> if (!instance_config_parse_command(in, tb))
> return false;
>
> + if (tb[INSTANCE_ATTR_TERMTIMEOUT])
> + in->term_timeout = blobmsg_get_u32(tb[INSTANCE_ATTR_TERMTIMEOUT]);
> if (tb[INSTANCE_ATTR_RESPAWN]) {
> int i = 0;
> uint32_t vals[3] = { 3600, 5, 5};
> @@ -933,8 +960,9 @@ instance_update(struct service_instance *in, struct service_instance *in_new)
> {
> bool changed = instance_config_changed(in, in_new);
> bool running = in->proc.pending;
> + bool stopping = in->halt;
>
> - if (!running) {
> + if (!running || stopping) {
> instance_config_move(in, in_new);
> instance_start(in);
> } else {
> @@ -967,6 +995,7 @@ instance_init(struct service_instance *in, struct service *s, struct blob_attr *
> in->config = config;
> in->timeout.cb = instance_timeout;
> in->proc.cb = instance_exit;
> + in->term_timeout = 5;
>
> in->_stdout.fd.fd = -2;
> in->_stdout.stream.string_data = true;
> @@ -999,6 +1028,7 @@ void instance_dump(struct blob_buf *b, struct service_instance *in, int verbose)
> blobmsg_add_u32(b, "pid", in->proc.pid);
> if (in->command)
> blobmsg_add_blob(b, in->command);
> + blobmsg_add_u32(b, "term_timeout", in->term_timeout);
>
> if (!avl_is_empty(&in->errors.avl)) {
> struct blobmsg_list_node *var;
> diff --git a/service/instance.h b/service/instance.h
> index 3cc2009..78999c8 100644
> --- a/service/instance.h
> +++ b/service/instance.h
> @@ -59,6 +59,7 @@ struct service_instance {
> char *seccomp;
> char *pidfile;
>
> + uint32_t term_timeout;
> uint32_t respawn_timeout;
> uint32_t respawn_threshold;
> uint32_t respawn_retry;
> diff --git a/service/service.c b/service/service.c
> index 2c73901..0584ee0 100644
> --- a/service/service.c
> +++ b/service/service.c
> @@ -59,11 +59,10 @@ service_instance_update(struct vlist_tree *tree, struct vlist_node *node_new,
> instance_update(in_o, in_n);
> instance_free(in_n);
> } else if (in_o) {
> - DEBUG(2, "Free instance %s::%s\n", in_o->srv->name, in_o->name);
> + DEBUG(2, "Stop instance %s::%s\n", in_o->srv->name, in_o->name);
> instance_stop(in_o);
> - instance_free(in_o);
> } else if (in_n) {
> - DEBUG(2, "Create instance %s::%s\n", in_n->srv->name, in_n->name);
> + DEBUG(2, "Start instance %s::%s\n", in_n->srv->name, in_n->name);
> instance_start(in_n);
> }
> blob_buf_init(&b, 0);
> @@ -80,7 +79,7 @@ service_alloc(const char *name)
> strcpy(new_name, name);
>
> vlist_init(&s->instances, avl_strcmp, service_instance_update);
> - s->instances.keep_old = true;
> + s->instances.no_delete = true;
> s->name = new_name;
> s->avl.key = s->name;
> INIT_LIST_HEAD(&s->validators);
> @@ -149,13 +148,8 @@ service_update(struct service *s, struct blob_attr **tb, bool add)
> static void
> service_delete(struct service *s)
> {
> - service_event("service.stop", s->name, NULL);
> vlist_flush_all(&s->instances);
> - avl_delete(&services, &s->avl);
> - trigger_del(s);
> - free(s->trigger);
> - free(s);
> - service_validate_del(s);
> + service_stopped(s);
> }
>
> enum {
> @@ -606,6 +600,18 @@ service_start_early(char *name, char *cmdline)
> return service_handle_set(NULL, NULL, NULL, "add", b.head);
> }
>
> +void service_stopped(struct service *s)
> +{
> + if (avl_is_empty(&s->instances.avl)) {
> + service_event("service.stop", s->name, NULL);
> + avl_delete(&services, &s->avl);
> + trigger_del(s);
> + free(s->trigger);
> + free(s);
> + service_validate_del(s);
> + }
> +}
> +
> void service_event(const char *type, const char *service, const char *instance)
> {
> if (!ctx)
> diff --git a/service/service.h b/service/service.h
> index c3f2964..d4f0a83 100644
> --- a/service/service.h
> +++ b/service/service.h
> @@ -50,7 +50,10 @@ void service_validate_add(struct service *s, struct blob_attr *attr);
> void service_validate_dump(struct blob_buf *b, struct service *s);
> void service_validate_dump_all(struct blob_buf *b, char *p, char *s);
> int service_start_early(char *name, char *cmdline);
> +void service_stopped(struct service *s);
> void service_validate_del(struct service *s);
> void service_event(const char *type, const char *service, const char *instance);
>
> +
> +
> #endif
>
More information about the Lede-dev
mailing list