ext/common/Watchdog.cpp in passenger-3.0.5 vs ext/common/Watchdog.cpp in passenger-3.0.6

- old
+ new

@@ -110,20 +110,34 @@ // Process can be started before the watcher thread is launched. if (pid == 0) { pid = start(); } ret = syscalls::waitpid(pid, &status, 0); + if (ret == -1 && errno == ECHILD) { + /* If the agent is attached to gdb then waitpid() + * here can return -1 with errno == ECHILD. + * Fallback to kill() polling for checking + * whether the agent is alive. + */ + ret = pid; + status = 0; + P_WARN("waitpid() on " << name() << " return -1 with " << + "errno = ECHILD, falling back to kill polling"); + waitpidUsingKillPolling(pid); + } lock.lock(); this->pid = 0; lock.unlock(); this_thread::disable_interruption di; this_thread::disable_syscall_interruption dsi; if (ret == -1) { + int e = errno; P_WARN(name() << " crashed or killed for " - "an unknown reason, restarting it..."); + "an unknown reason (errno = " << + strerror(e) << "), restarting it..."); } else if (WIFEXITED(status)) { if (WEXITSTATUS(status) == 0) { /* When the web server is gracefully exiting, it will * tell one or more agents to gracefully exit with exit * status 0. If we see this then it means the watchdog @@ -240,9 +254,21 @@ } else { syscalls::usleep(10000); } } while (timer.elapsed() < timeout); return 0; // timed out + } + + static void waitpidUsingKillPolling(pid_t pid) { + bool done = false; + + while (!done) { + int ret = syscalls::kill(pid, 0); + done = ret == -1; + if (!done) { + syscalls::usleep(20000); + } + } } public: AgentWatcher() { thr = NULL;