ext/common/Watchdog.cpp in passenger-3.0.0 vs ext/common/Watchdog.cpp in passenger-3.0.1

- old
+ new

@@ -49,10 +49,11 @@ #include "Exceptions.h" #include "ResourceLocator.h" #include "Utils.h" #include "Utils/Base64.h" #include "Utils/Timer.h" +#include "Utils/ScopeGuard.h" #include "Utils/IOUtils.h" #include "Utils/VariantMap.h" using namespace std; using namespace boost; @@ -86,41 +87,10 @@ static EventFd *errorEvent; #define REQUEST_SOCKET_PASSWORD_SIZE 64 -class FailGuard { -private: - function<void ()> func; -public: - FailGuard() { } - FailGuard(const function<void ()> &_func): func(_func) { } - - ~FailGuard() { - if (func != NULL) { - func(); - } - } - - void runNow() { - if (func != NULL) { - function<void ()> func = this->func; - this->func = NULL; - func(); - } - } - - void set(const function<void ()> &func) { - this->func = func; - } - - void clear() { - func = NULL; - } -}; - - /** * Abstract base class for watching agent processes. */ class AgentWatcher { private: @@ -135,10 +105,11 @@ while (!this_thread::interruption_requested()) { lock.lock(); pid = this->pid; lock.unlock(); + // Process can be started before the watcher thread is launched. if (pid == 0) { pid = start(); } ret = syscalls::waitpid(pid, &status, 0); @@ -299,26 +270,23 @@ */ virtual pid_t start() { this_thread::disable_interruption di; this_thread::disable_syscall_interruption dsi; string exeFilename = getExeFilename(); - int fds[2], e, ret; + SocketPair fds; + int e, ret; pid_t pid; /* Create feedback fd for this agent process. We'll send some startup * arguments to this agent process through this fd, and we'll receive * startup information through it as well. */ - if (syscalls::socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == -1) { - int e = errno; - throw SystemException("Cannot create a Unix socket pair", e); - } + fds = createUnixSocketPair(); pid = syscalls::fork(); if (pid == 0) { // Child - long max_fds, i; /* Make sure file descriptor FEEDBACK_FD refers to the newly created * feedback fd (fds[1]) and close all other file descriptors. * In this child process we don't care about the original FEEDBACK_FD * (which is the Watchdog's communication channel to the agents starter.) @@ -343,15 +311,11 @@ fflush(stderr); _exit(1); } } - /* Close all file descriptors except 0-FEEDBACK_FD. */ - max_fds = sysconf(_SC_OPEN_MAX); - for (i = FEEDBACK_FD + 1; i < max_fds; i++) { - syscalls::close(i); - } + closeAllFileDescriptors(FEEDBACK_FD); /* Become the process group leader so that the watchdog can kill the * agent as well as all its descendant processes. */ setpgid(getpid(), getpid()); @@ -364,99 +328,112 @@ } e = errno; try { MessageChannel(FEEDBACK_FD).write("exec error", toString(e).c_str(), NULL); - _exit(1); } catch (...) { fprintf(stderr, "Passenger Watchdog: could not execute %s: %s (%d)\n", exeFilename.c_str(), strerror(e), e); fflush(stderr); - _exit(1); } + _exit(1); } else if (pid == -1) { // Error e = errno; - syscalls::close(fds[0]); - syscalls::close(fds[1]); throw SystemException("Cannot fork a new process", e); } else { // Parent - FileDescriptor feedbackFd(fds[0]); + FileDescriptor feedbackFd = fds[0]; vector<string> args; - syscalls::close(fds[1]); + fds[1].close(); this_thread::restore_interruption ri(di); this_thread::restore_syscall_interruption rsi(dsi); - FailGuard failGuard(boost::bind(killAndWait, pid)); + ScopeGuard failGuard(boost::bind(killAndWait, pid)); - // Send startup arguments. + /* Send startup arguments. Ignore EPIPE and ECONNRESET here + * because the child process might have sent an feedback message + * without reading startup arguments. + */ try { sendStartupArguments(pid, feedbackFd); } catch (const SystemException &ex) { - throw SystemException(string("Unable to start the ") + name() + - ": an error occurred while sending startup arguments", - ex.code()); + if (ex.code() != EPIPE && ex.code() != ECONNRESET) { + throw SystemException(string("Unable to start the ") + name() + + ": an error occurred while sending startup arguments", + ex.code()); + } } // Now read its feedback. try { - if (!MessageChannel(feedbackFd).read(args)) { - throw EOFException(""); + ret = MessageChannel(feedbackFd).read(args); + } catch (const SystemException &e) { + if (e.code() == ECONNRESET) { + ret = false; + } else { + throw SystemException(string("Unable to start the ") + name() + + ": unable to read its startup information", + e.code()); } - } catch (const EOFException &e) { + } + if (!ret) { this_thread::disable_interruption di2; this_thread::disable_syscall_interruption dsi2; int status; - /* The feedback fd was closed for an unknown reason. + /* The feedback fd was prematurely closed for an unknown reason. * Did the agent process crash? * * We use timedWaitPid() here because if the process crashed * because of an uncaught exception, the file descriptor * might be closed before the process has printed an error * message, so we give it some time to print the error * before we kill it. */ - ret = timedWaitPid(pid, &status, 1000); + ret = timedWaitPid(pid, &status, 5000); if (ret == 0) { /* Doesn't look like it; it seems it's still running. * We can't do anything without proper feedback so kill * the agent process and throw an exception. */ failGuard.runNow(); throw RuntimeException(string("Unable to start the ") + name() + - ": an unknown error occurred during its startup"); + ": it froze and reported an unknown error during its startup"); } else if (ret != -1 && WIFSIGNALED(status)) { /* Looks like a crash which caused a signal. */ throw RuntimeException(string("Unable to start the ") + name() + ": it seems to have been killed with signal " + getSignalName(WTERMSIG(status)) + " during startup"); - } else { + } else if (ret == -1) { /* Looks like it exited after detecting an error. */ throw RuntimeException(string("Unable to start the ") + name() + ": it seems to have crashed during startup for an unknown reason"); + } else { + /* Looks like it exited after detecting an error, but has an exit code. */ + throw RuntimeException(string("Unable to start the ") + name() + + ": it seems to have crashed during startup for an unknown reason, " + "with exit code " + toString(WEXITSTATUS(status))); } - } catch (const SystemException &e) { - throw SystemException(string("Unable to start the ") + name() + - ": unable to read its startup information", - e.code()); - } catch (const RuntimeException &) { - /* Rethrow without killing the PID because the process - * is already dead. - */ - failGuard.clear(); - throw; } if (args[0] == "system error before exec") { throw SystemException(string("Unable to start the ") + name() + ": " + args[1], atoi(args[2])); } else if (args[0] == "exec error") { - throw SystemException(string("Unable to start the ") + name() + - " because exec(\"" + getExeFilename() + "\") failed", - atoi(args[1])); + e = atoi(args[1]); + if (e == ENOENT) { + throw RuntimeException(string("Unable to start the ") + name() + + " because its executable (" + getExeFilename() + ") " + "doesn't exist. This probably means that your " + "Phusion Passenger installation is broken or " + "incomplete. Please reinstall Phusion Passenger"); + } else { + throw SystemException(string("Unable to start the ") + name() + + " because exec(\"" + getExeFilename() + "\") failed", + atoi(args[1])); + } } else if (!processStartupInfo(pid, feedbackFd, args)) { throw RuntimeException(string("The ") + name() + " sent an unknown startup info message '" + args[0] + "'"); } @@ -538,11 +515,11 @@ lock_guard<boost::mutex> l(lock); return threadExceptionBacktrace; } /** - * Returns the agent process feedback fd, or NULL if the agent process + * Returns the agent process feedback fd, or -1 if the agent process * hasn't been started yet. Can be used to check whether this agent process * has exited without using waitpid(). */ const FileDescriptor getFeedbackFd() const { lock_guard<boost::mutex> l(lock); @@ -679,17 +656,12 @@ // Fork a process which touches everything in the server instance dir. pid_t pid = syscalls::fork(); if (pid == 0) { // Child int prio, ret, e; - long max_fds, i; - // Close all unnecessary file descriptors. - max_fds = sysconf(_SC_OPEN_MAX); - for (i = 3; i < max_fds; i++) { - syscalls::close(i); - } + closeAllFileDescriptors(2); // Make process nicer. do { prio = getpriority(PRIO_PROCESS, getpid()); } while (prio == -1 && errno == EINTR); @@ -917,14 +889,9 @@ } } int main(int argc, char *argv[]) { - /* Become the session leader so that Apache can't kill this - * watchdog with killpg() during shutdown, and so that a - * Ctrl-C only affects the web server. - */ - setsid(); disableOomKiller(); agentsOptions = initializeAgent(argc, argv, "PassengerWatchdog"); logLevel = agentsOptions.getInt("log_level"); webServerPid = agentsOptions.getPid("web_server_pid");