From: Paul Eggert Date: Fri, 23 Nov 2012 22:20:31 +0000 (-0800) Subject: Fix a race condition with glib (Bug#8855). X-Git-Tag: emacs-24.2.90~6 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=6d4e8f62e93b575a1da2cd2b4abeb9dce56e1e52;p=emacs.git Fix a race condition with glib (Bug#8855). This is a backport from the trunk, consisting of: 2012-11-17 Eli Zaretskii * nt/inc/sys/wait.h: New file, with prototype of waitpid and definitions of macros it needs. * nt/inc/ms-w32.h (wait): Don't define, 'wait' is not used anymore. (sys_wait): Remove prototype. * nt/config.nt (HAVE_SYS_WAIT_H): Define to 1. * src/w32proc.c (create_child): Don't clip the PID of the child process to fit into an Emacs integer, as this is no longer a restriction. (waitpid): Rename from sys_wait. Emulate a Posix 'waitpid' by reaping only the process specified by PID argument, if that is positive. Use PID instead of dead_child to know which process to reap. Wait for the child to die only if WNOHANG is not in OPTIONS. (sys_select): Don't set dead_child. * src/sysdep.c (wait_for_termination_1): Remove the WINDOWSNT portion, as it is no longer needed. * src/process.c (waitpid, WUNTRACED) [!WNOHANG]: Remove definitions, no longer needed. (record_child_status_change): Remove the setting of record_at_most_one_child for the !WNOHANG case. 2012-11-03 Paul Eggert Fix a race condition that causes Emacs to mess up glib (Bug#8855). This is a backport from the trunk. The symptom is a diagnostic "GLib-WARNING **: In call to g_spawn_sync(), exit status of a child process was requested but SIGCHLD action was set to SIG_IGN and ECHILD was received by waitpid(), so exit status can't be returned." The diagnostic is partly wrong, as the SIGCHLD action is not set to SIG_IGN. The real bug is a race condition between Emacs and glib: Emacs does a waitpid (-1, ...) and reaps glib's subprocess by mistake, so that glib can't find it. Work around the bug by invoking waitpid only on subprocesses that Emacs itself creates. * src/process.c (create_process, record_child_status_change): Don't use special value -1 in pid field, as the caller now must know the pid rather than having the callee infer it. The inference was sometimes incorrect anyway, due to another race. (create_process): Set new 'alive' member if child is created. (process_status_retrieved): New function. (record_child_status_change): Use it. Accept negative 1st argument, which means to wait for the processes that Emacs already knows about. Move special-case code for DOS_NT (which lacks WNOHANG) here, from caller. Keep track of processes that have already been waited for, by testing and clearing new 'alive' member. (CAN_HANDLE_MULTIPLE_CHILDREN): Remove, as record_child_status_change now does this internally. (handle_child_signal): Let record_child_status_change do all the work, since we do not want to reap all exited child processes, only the child processes that Emacs itself created. * src/process.h (Lisp_Process): New boolean member 'alive'. --- diff --git a/nt/ChangeLog b/nt/ChangeLog index f11f1fc5fc5..b4537de0d4f 100644 --- a/nt/ChangeLog +++ b/nt/ChangeLog @@ -1,3 +1,16 @@ +2012-11-23 Paul Eggert + + Fix a race condition with glib (Bug#8855). + This is a backport from the trunk, consisting of: + + 2012-11-17 Eli Zaretskii + + * inc/sys/wait.h: New file, with prototype of waitpid and + definitions of macros it needs. + * inc/ms-w32.h (wait): Don't define, 'wait' is not used anymore. + (sys_wait): Remove prototype. + * config.nt (HAVE_SYS_WAIT_H): Define to 1. + 2012-11-20 Eli Zaretskii * nmake.defs: Use !if, not !ifdef. See diff --git a/nt/config.nt b/nt/config.nt index ed1cddf1e12..7e82283b41a 100644 --- a/nt/config.nt +++ b/nt/config.nt @@ -963,7 +963,7 @@ along with GNU Emacs. If not, see . */ #undef HAVE_SYS_VLIMIT_H /* Define to 1 if you have that is POSIX.1 compatible. */ -#undef HAVE_SYS_WAIT_H +#define HAVE_SYS_WAIT_H 1 /* Define to 1 if you have the header file. */ #undef HAVE_TERM_H diff --git a/nt/inc/ms-w32.h b/nt/inc/ms-w32.h index dd2ae781cb8..a9a3049f9b0 100644 --- a/nt/inc/ms-w32.h +++ b/nt/inc/ms-w32.h @@ -185,15 +185,12 @@ extern char *getenv (); /* Subprocess calls that are emulated. */ #define spawnve sys_spawnve -#define wait sys_wait #define kill sys_kill #define signal sys_signal /* Internal signals. */ #define emacs_raise(sig) emacs_abort() -extern int sys_wait (int *); - /* termcap.c calls that are emulated. */ #define tputs sys_tputs #define tgetstr sys_tgetstr diff --git a/nt/inc/sys/wait.h b/nt/inc/sys/wait.h new file mode 100644 index 00000000000..8d890c9e175 --- /dev/null +++ b/nt/inc/sys/wait.h @@ -0,0 +1,33 @@ +/* A limited emulation of sys/wait.h on Posix systems. + +Copyright (C) 2012 Free Software Foundation, Inc. + +This file is part of GNU Emacs. + +GNU Emacs is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +GNU Emacs is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Emacs. If not, see . */ + +#ifndef INC_SYS_WAIT_H_ +#define INC_SYS_WAIT_H_ + +#define WNOHANG 1 +#define WUNTRACED 2 +#define WSTOPPED 2 /* same as WUNTRACED */ +#define WEXITED 4 +#define WCONTINUED 8 + +/* The various WIF* macros are defined in src/syswait.h. */ + +extern pid_t waitpid (pid_t, int *, int); + +#endif /* INC_SYS_WAIT_H_ */ diff --git a/src/ChangeLog b/src/ChangeLog index 88fbf7a99f2..655eb1595c0 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,60 @@ +2012-11-23 Paul Eggert + + Fix a race condition with glib (Bug#8855). + This is a backport from the trunk, consisting of: + + 2012-11-17 Eli Zaretskii + + * w32proc.c (create_child): Don't clip the PID of the child + process to fit into an Emacs integer, as this is no longer a + restriction. + (waitpid): Rename from sys_wait. Emulate a Posix 'waitpid' by + reaping only the process specified by PID argument, if that is + positive. Use PID instead of dead_child to know which process to + reap. Wait for the child to die only if WNOHANG is not in + OPTIONS. + (sys_select): Don't set dead_child. + + * sysdep.c (wait_for_termination_1): Remove the WINDOWSNT portion, + as it is no longer needed. + + * process.c (waitpid, WUNTRACED) [!WNOHANG]: Remove definitions, + no longer needed. + (record_child_status_change): Remove the setting of + record_at_most_one_child for the !WNOHANG case. + + 2012-11-03 Paul Eggert + + Fix a race condition that causes Emacs to mess up glib (Bug#8855). + This is a backport from the trunk. + The symptom is a diagnostic "GLib-WARNING **: In call to + g_spawn_sync(), exit status of a child process was requested but + SIGCHLD action was set to SIG_IGN and ECHILD was received by + waitpid(), so exit status can't be returned." The diagnostic + is partly wrong, as the SIGCHLD action is not set to SIG_IGN. + The real bug is a race condition between Emacs and glib: Emacs + does a waitpid (-1, ...) and reaps glib's subprocess by mistake, + so that glib can't find it. Work around the bug by invoking + waitpid only on subprocesses that Emacs itself creates. + * process.c (create_process, record_child_status_change): + Don't use special value -1 in pid field, as the caller now must + know the pid rather than having the callee infer it. The + inference was sometimes incorrect anyway, due to another race. + (create_process): Set new 'alive' member if child is created. + (process_status_retrieved): New function. + (record_child_status_change): Use it. + Accept negative 1st argument, which means to wait for the + processes that Emacs already knows about. Move special-case code + for DOS_NT (which lacks WNOHANG) here, from caller. Keep track of + processes that have already been waited for, by testing and + clearing new 'alive' member. + (CAN_HANDLE_MULTIPLE_CHILDREN): Remove, as record_child_status_change + now does this internally. + (handle_child_signal): Let record_child_status_change do all + the work, since we do not want to reap all exited child processes, + only the child processes that Emacs itself created. + * process.h (Lisp_Process): New boolean member 'alive'. + 2012-11-23 Eli Zaretskii * xdisp.c (set_cursor_from_row): Skip step 2 only if point is not diff --git a/src/process.c b/src/process.c index 77e99ead01f..c095d13293b 100644 --- a/src/process.c +++ b/src/process.c @@ -130,14 +130,6 @@ extern int sys_select (int, SELECT_TYPE *, SELECT_TYPE *, SELECT_TYPE *, EMACS_TIME *, void *); #endif -#ifndef WNOHANG -# undef waitpid -# define waitpid(pid, status, options) wait (status) -#endif -#ifndef WUNTRACED -# define WUNTRACED 0 -#endif - /* Work around GCC 4.7.0 bug with strict overflow checking; see . These lines can be removed once the GCC bug is fixed. */ @@ -795,9 +787,8 @@ get_process (register Lisp_Object name) #ifdef SIGCHLD /* Fdelete_process promises to immediately forget about the process, but in reality, Emacs needs to remember those processes until they have been - treated by the SIGCHLD handler; otherwise this handler would consider the - process as being synchronous and say that the synchronous process is - dead. */ + treated by the SIGCHLD handler and waitpid has been invoked on them; + otherwise they might fill up the kernel's process table. */ static Lisp_Object deleted_pid_list; #endif @@ -1704,16 +1695,7 @@ create_process (Lisp_Object process, char **new_argv, Lisp_Object current_dir) if (inchannel > max_process_desc) max_process_desc = inchannel; - /* Until we store the proper pid, enable the SIGCHLD handler - to recognize an unknown pid as standing for this process. - It is very important not to let this `marker' value stay - in the table after this function has returned; if it does - it might cause call-process to hang and subsequent asynchronous - processes to get their return values scrambled. */ - XPROCESS (process)->pid = -1; - - /* This must be called after the above line because it may signal an - error. */ + /* This may signal an error. */ setup_process_coding_systems (process); encoded_current_dir = ENCODE_FILE (current_dir); @@ -1880,6 +1862,8 @@ create_process (Lisp_Object process, char **new_argv, Lisp_Object current_dir) #endif XPROCESS (process)->pid = pid; + if (0 <= pid) + XPROCESS (process)->alive = 1; /* Stop blocking signals in the parent. */ #ifdef SIGCHLD @@ -6273,9 +6257,35 @@ process has been transmitted to the serial port. */) return process; } -/* On receipt of a signal that a child status has changed, loop asking - about children with changed statuses until the system says there - are no more. +/* If the status of the process DESIRED has changed, return true and + set *STATUS to its exit status; otherwise, return false. + If HAVE is nonnegative, assume that HAVE = waitpid (HAVE, STATUS, ...) + has already been invoked, and do not invoke waitpid again. */ + +static bool +process_status_retrieved (pid_t desired, pid_t have, int *status) +{ + if (have < 0) + { + /* Invoke waitpid only with a known process ID; do not invoke + waitpid with a nonpositive argument. Otherwise, Emacs might + reap an unwanted process by mistake. For example, invoking + waitpid (-1, ...) can mess up glib by reaping glib's subprocesses, + so that another thread running glib won't find them. */ + do + have = waitpid (desired, status, WNOHANG | WUNTRACED); + while (have < 0 && errno == EINTR); + } + + return have == desired; +} + +/* If PID is nonnegative, the child process PID with wait status W has + changed its status; record this and return true. + + If PID is negative, ignore W, and look for known child processes + of Emacs whose status have changed. For each one found, record its new + status. All we do is change the status; we do not run sentinels or print notifications. That is saved for the next time keyboard input is @@ -6298,13 +6308,15 @@ process has been transmitted to the serial port. */) ** Malloc WARNING: This should never call malloc either directly or indirectly; if it does, that is a bug */ -/* Record the changed status of the child process PID with wait status W. */ void record_child_status_change (pid_t pid, int w) { #ifdef SIGCHLD - Lisp_Object proc; - struct Lisp_Process *p; + + /* On POSIXish hosts, record at most one child only if we already + know one child that has exited. */ + bool record_at_most_one_child = 0 <= pid; + Lisp_Object tail; /* Find the process that signaled us, and record its status. */ @@ -6312,68 +6324,69 @@ record_child_status_change (pid_t pid, int w) /* The process can have been deleted by Fdelete_process. */ for (tail = deleted_pid_list; CONSP (tail); tail = XCDR (tail)) { + bool all_pids_are_fixnums + = (MOST_NEGATIVE_FIXNUM <= TYPE_MINIMUM (pid_t) + && TYPE_MAXIMUM (pid_t) <= MOST_POSITIVE_FIXNUM); Lisp_Object xpid = XCAR (tail); - if ((INTEGERP (xpid) && pid == XINT (xpid)) - || (FLOATP (xpid) && pid == XFLOAT_DATA (xpid))) + if (all_pids_are_fixnums ? INTEGERP (xpid) : NUMBERP (xpid)) { - XSETCAR (tail, Qnil); - return; + pid_t deleted_pid; + if (INTEGERP (xpid)) + deleted_pid = XINT (xpid); + else + deleted_pid = XFLOAT_DATA (xpid); + if (process_status_retrieved (deleted_pid, pid, &w)) + { + XSETCAR (tail, Qnil); + if (record_at_most_one_child) + return; + } } } /* Otherwise, if it is asynchronous, it is in Vprocess_alist. */ - p = 0; for (tail = Vprocess_alist; CONSP (tail); tail = XCDR (tail)) { - proc = XCDR (XCAR (tail)); - p = XPROCESS (proc); - if (EQ (p->type, Qreal) && p->pid == pid) - break; - p = 0; - } - - /* Look for an asynchronous process whose pid hasn't been filled - in yet. */ - if (! p) - for (tail = Vprocess_alist; CONSP (tail); tail = XCDR (tail)) - { - proc = XCDR (XCAR (tail)); - p = XPROCESS (proc); - if (p->pid == -1) - break; - p = 0; - } + Lisp_Object proc = XCDR (XCAR (tail)); + struct Lisp_Process *p = XPROCESS (proc); + if (p->alive && process_status_retrieved (p->pid, pid, &w)) + { + /* Change the status of the process that was found. */ + p->tick = ++process_tick; + p->raw_status = w; + p->raw_status_new = 1; - /* Change the status of the process that was found. */ - if (p) - { - int clear_desc_flag = 0; + /* If process has terminated, stop waiting for its output. */ + if (WIFSIGNALED (w) || WIFEXITED (w)) + { + int clear_desc_flag = 0; + p->alive = 0; + if (p->infd >= 0) + clear_desc_flag = 1; - p->tick = ++process_tick; - p->raw_status = w; - p->raw_status_new = 1; + /* clear_desc_flag avoids a compiler bug in Microsoft C. */ + if (clear_desc_flag) + { + FD_CLR (p->infd, &input_wait_mask); + FD_CLR (p->infd, &non_keyboard_wait_mask); + } + } - /* If process has terminated, stop waiting for its output. */ - if ((WIFSIGNALED (w) || WIFEXITED (w)) - && p->infd >= 0) - clear_desc_flag = 1; + /* Tell wait_reading_process_output that it needs to wake up and + look around. */ + if (input_available_clear_time) + *input_available_clear_time = make_emacs_time (0, 0); - /* We use clear_desc_flag to avoid a compiler bug in Microsoft C. */ - if (clear_desc_flag) - { - FD_CLR (p->infd, &input_wait_mask); - FD_CLR (p->infd, &non_keyboard_wait_mask); + if (record_at_most_one_child) + return; } - - /* Tell wait_reading_process_output that it needs to wake up and - look around. */ - if (input_available_clear_time) - *input_available_clear_time = make_emacs_time (0, 0); } - /* There was no asynchronous process found for that pid: we have - a synchronous process. */ - else + + if (0 <= pid) { + /* The caller successfully waited for a pid but no asynchronous + process was found for it, so this is a synchronous process. */ + synch_process_alive = 0; /* Report the status of the synchronous process. */ @@ -6392,38 +6405,10 @@ record_child_status_change (pid_t pid, int w) #ifdef SIGCHLD -/* On some systems, the SIGCHLD handler must return right away. If - any more processes want to signal us, we will get another signal. - Otherwise, loop around to use up all the processes that have - something to tell us. */ -#if (defined WINDOWSNT \ - || (defined USG && !defined GNU_LINUX \ - && !(defined HPUX && defined WNOHANG))) -enum { CAN_HANDLE_MULTIPLE_CHILDREN = 0 }; -#else -enum { CAN_HANDLE_MULTIPLE_CHILDREN = 1 }; -#endif - static void handle_child_signal (int sig) { - do - { - pid_t pid; - int status; - - do - pid = waitpid (-1, &status, WNOHANG | WUNTRACED); - while (pid < 0 && errno == EINTR); - - /* PID == 0 means no processes found, PID == -1 means a real failure. - Either way, we have done all our job. */ - if (pid <= 0) - break; - - record_child_status_change (pid, status); - } - while (CAN_HANDLE_MULTIPLE_CHILDREN); + record_child_status_change (-1, 0); } static void diff --git a/src/process.h b/src/process.h index ce3d2e702cc..74d1a124060 100644 --- a/src/process.h +++ b/src/process.h @@ -142,6 +142,9 @@ struct Lisp_Process /* Flag to set coding-system of the process buffer from the coding_system used to decode process output. */ unsigned int inherit_coding_system_flag : 1; + /* Whether the process is alive, i.e., can be waited for. Running + processes can be waited for, but exited and fake processes cannot. */ + unsigned int alive : 1; /* Record the process status in the raw form in which it comes from `wait'. This is to avoid consing in a signal handler. The `raw_status_new' flag indicates that `raw_status' contains a new status that still diff --git a/src/sysdep.c b/src/sysdep.c index 63eac5d9e09..bb81353847b 100644 --- a/src/sysdep.c +++ b/src/sysdep.c @@ -289,10 +289,6 @@ wait_for_termination_1 (pid_t pid, int interruptible) { while (1) { -#ifdef WINDOWSNT - wait (0); - break; -#else /* not WINDOWSNT */ int status; int wait_result = waitpid (pid, &status, 0); if (wait_result < 0) @@ -306,7 +302,8 @@ wait_for_termination_1 (pid_t pid, int interruptible) break; } -#endif /* not WINDOWSNT */ + /* Note: the MS-Windows emulation of waitpid calls QUIT + internally. */ if (interruptible) QUIT; } diff --git a/src/w32proc.c b/src/w32proc.c index e3c54fe5460..b4f2099f06a 100644 --- a/src/w32proc.c +++ b/src/w32proc.c @@ -783,7 +783,6 @@ alarm (int seconds) /* Child process management list. */ int child_proc_count = 0; child_process child_procs[ MAX_CHILDREN ]; -child_process *dead_child = NULL; static DWORD WINAPI reader_thread (void *arg); @@ -1036,9 +1035,6 @@ create_child (char *exe, char *cmdline, char *env, int is_gui_app, if (cp->pid < 0) cp->pid = -cp->pid; - /* pid must fit in a Lisp_Int */ - cp->pid = cp->pid & INTMASK; - *pPid = cp->pid; return TRUE; @@ -1114,55 +1110,110 @@ reap_subprocess (child_process *cp) delete_child (cp); } -/* Wait for any of our existing child processes to die - When it does, close its handle - Return the pid and fill in the status if non-NULL. */ +/* Wait for a child process specified by PID, or for any of our + existing child processes (if PID is nonpositive) to die. When it + does, close its handle. Return the pid of the process that died + and fill in STATUS if non-NULL. */ -int -sys_wait (int *status) +pid_t +waitpid (pid_t pid, int *status, int options) { DWORD active, retval; int nh; - int pid; child_process *cp, *cps[MAX_CHILDREN]; HANDLE wait_hnd[MAX_CHILDREN]; + DWORD timeout_ms; + int dont_wait = (options & WNOHANG) != 0; nh = 0; - if (dead_child != NULL) + /* According to Posix: + + PID = -1 means status is requested for any child process. + + PID > 0 means status is requested for a single child process + whose pid is PID. + + PID = 0 means status is requested for any child process whose + process group ID is equal to that of the calling process. But + since Windows has only a limited support for process groups (only + for console processes and only for the purposes of passing + Ctrl-BREAK signal to them), and since we have no documented way + of determining whether a given process belongs to our group, we + treat 0 as -1. + + PID < -1 means status is requested for any child process whose + process group ID is equal to the absolute value of PID. Again, + since we don't support process groups, we treat that as -1. */ + if (pid > 0) { - /* We want to wait for a specific child */ - wait_hnd[nh] = dead_child->procinfo.hProcess; - cps[nh] = dead_child; - if (!wait_hnd[nh]) emacs_abort (); - nh++; - active = 0; - goto get_result; + int our_child = 0; + + /* We are requested to wait for a specific child. */ + for (cp = child_procs + (child_proc_count-1); cp >= child_procs; cp--) + { + /* Some child_procs might be sockets; ignore them. Also + ignore subprocesses whose output is not yet completely + read. */ + if (CHILD_ACTIVE (cp) + && cp->procinfo.hProcess + && cp->pid == pid) + { + our_child = 1; + break; + } + } + if (our_child) + { + if (cp->fd < 0 || (fd_info[cp->fd].flags & FILE_AT_EOF) != 0) + { + wait_hnd[nh] = cp->procinfo.hProcess; + cps[nh] = cp; + nh++; + } + else if (dont_wait) + { + /* PID specifies our subprocess, but its status is not + yet available. */ + return 0; + } + } + if (nh == 0) + { + /* No such child process, or nothing to wait for, so fail. */ + errno = ECHILD; + return -1; + } } else { for (cp = child_procs + (child_proc_count-1); cp >= child_procs; cp--) - /* some child_procs might be sockets; ignore them */ - if (CHILD_ACTIVE (cp) && cp->procinfo.hProcess - && (cp->fd < 0 || (fd_info[cp->fd].flags & FILE_AT_EOF) != 0)) - { - wait_hnd[nh] = cp->procinfo.hProcess; - cps[nh] = cp; - nh++; - } + { + if (CHILD_ACTIVE (cp) + && cp->procinfo.hProcess + && (cp->fd < 0 || (fd_info[cp->fd].flags & FILE_AT_EOF) != 0)) + { + wait_hnd[nh] = cp->procinfo.hProcess; + cps[nh] = cp; + nh++; + } + } + if (nh == 0) + { + /* Nothing to wait on, so fail. */ + errno = ECHILD; + return -1; + } } - if (nh == 0) - { - /* Nothing to wait on, so fail */ - errno = ECHILD; - return -1; - } + if (dont_wait) + timeout_ms = 0; + else + timeout_ms = 1000; /* check for quit about once a second. */ do { - /* Check for quit about once a second. */ QUIT; - active = WaitForMultipleObjects (nh, wait_hnd, FALSE, 1000); + active = WaitForMultipleObjects (nh, wait_hnd, FALSE, timeout_ms); } while (active == WAIT_TIMEOUT); if (active == WAIT_FAILED) @@ -1192,8 +1243,10 @@ get_result: } if (retval == STILL_ACTIVE) { - /* Should never happen */ + /* Should never happen. */ DebPrint (("Wait.WaitForMultipleObjects returned an active process\n")); + if (pid > 0 && dont_wait) + return 0; errno = EINVAL; return -1; } @@ -1207,6 +1260,8 @@ get_result: else retval <<= 8; + if (pid > 0 && active != 0) + emacs_abort (); cp = cps[active]; pid = cp->pid; #ifdef FULL_DEBUG @@ -1995,9 +2050,7 @@ count_children: DebPrint (("select calling SIGCHLD handler for pid %d\n", cp->pid)); #endif - dead_child = cp; sig_handlers[SIGCHLD] (SIGCHLD); - dead_child = NULL; } } else if (fdindex[active] == -1)