ext/common/Watchdog.cpp in passenger-3.0.5 vs ext/common/Watchdog.cpp in passenger-3.0.6
- old
+ new
@@ -110,20 +110,34 @@
// Process can be started before the watcher thread is launched.
if (pid == 0) {
pid = start();
}
ret = syscalls::waitpid(pid, &status, 0);
+ if (ret == -1 && errno == ECHILD) {
+ /* If the agent is attached to gdb then waitpid()
+ * here can return -1 with errno == ECHILD.
+ * Fallback to kill() polling for checking
+ * whether the agent is alive.
+ */
+ ret = pid;
+ status = 0;
+ P_WARN("waitpid() on " << name() << " return -1 with " <<
+ "errno = ECHILD, falling back to kill polling");
+ waitpidUsingKillPolling(pid);
+ }
lock.lock();
this->pid = 0;
lock.unlock();
this_thread::disable_interruption di;
this_thread::disable_syscall_interruption dsi;
if (ret == -1) {
+ int e = errno;
P_WARN(name() << " crashed or killed for "
- "an unknown reason, restarting it...");
+ "an unknown reason (errno = " <<
+ strerror(e) << "), restarting it...");
} else if (WIFEXITED(status)) {
if (WEXITSTATUS(status) == 0) {
/* When the web server is gracefully exiting, it will
* tell one or more agents to gracefully exit with exit
* status 0. If we see this then it means the watchdog
@@ -240,9 +254,21 @@
} else {
syscalls::usleep(10000);
}
} while (timer.elapsed() < timeout);
return 0; // timed out
+ }
+
+ static void waitpidUsingKillPolling(pid_t pid) {
+ bool done = false;
+
+ while (!done) {
+ int ret = syscalls::kill(pid, 0);
+ done = ret == -1;
+ if (!done) {
+ syscalls::usleep(20000);
+ }
+ }
}
public:
AgentWatcher() {
thr = NULL;