ext/common/Watchdog.cpp in passenger-3.0.6 vs ext/common/Watchdog.cpp in passenger-3.0.7

- old
+ new

@@ -45,10 +45,11 @@ #include "MessageChannel.h" #include "Constants.h" #include "RandomGenerator.h" #include "Logging.h" #include "Exceptions.h" +#include "StaticString.h" #include "ResourceLocator.h" #include "Utils.h" #include "Utils/Base64.h" #include "Utils/Timer.h" #include "Utils/ScopeGuard.h" @@ -77,20 +78,23 @@ static unsigned int maxPoolSize; static unsigned int maxInstancesPerApp; static unsigned int poolIdleTime; static string serializedPrestartURLs; +static string oldOomScore; static ServerInstanceDirPtr serverInstanceDir; static ServerInstanceDir::GenerationPtr generation; static string loggingAgentAddress; static string loggingAgentPassword; static RandomGenerator *randomGenerator; static EventFd *errorEvent; #define REQUEST_SOCKET_PASSWORD_SIZE 64 +static string setOomScore(const StaticString &score); + /** * Abstract base class for watching agent processes. */ class AgentWatcher { private: @@ -98,11 +102,11 @@ oxt::thread *thr; void threadMain() { try { pid_t pid, ret; - int status; + int status, e; while (!this_thread::interruption_requested()) { lock.lock(); pid = this->pid; lock.unlock(); @@ -121,20 +125,22 @@ ret = pid; status = 0; P_WARN("waitpid() on " << name() << " return -1 with " << "errno = ECHILD, falling back to kill polling"); waitpidUsingKillPolling(pid); + e = 0; + } else { + e = errno; } lock.lock(); this->pid = 0; lock.unlock(); this_thread::disable_interruption di; this_thread::disable_syscall_interruption dsi; if (ret == -1) { - int e = errno; P_WARN(name() << " crashed or killed for " "an unknown reason (errno = " << strerror(e) << "), restarting it..."); } else if (WIFEXITED(status)) { if (WEXITSTATUS(status) == 0) { @@ -343,10 +349,12 @@ /* Become the process group leader so that the watchdog can kill the * agent as well as all its descendant processes. */ setpgid(getpid(), getpid()); + setOomScore(oldOomScore); + try { execProgram(); } catch (...) { fprintf(stderr, "PassengerWatchdog: execProgram() threw an exception\n"); fflush(stderr); @@ -713,10 +721,12 @@ strerror(e), e); fflush(stderr); _exit(1); } + setOomScore(oldOomScore); + execlp("/bin/sh", "/bin/sh", "-c", "find . | xargs touch", (char *) 0); e = errno; fprintf(stderr, "Cannot execute 'find . | xargs touch': %s (%d)\n", strerror(e), e); fflush(stderr); @@ -746,26 +756,48 @@ } }; /** - * Most operating systems overcommit memory. We *know* that this watchdog process - * doesn't use much memory; on OS X it uses about 200 KB of private RSS. If the - * watchdog is killed by the system Out-Of-Memory Killer or then it's all over: - * the system administrator will have to restart the web server for Phusion - * Passenger to be usable again. So in this function we do whatever is necessary - * to prevent this watchdog process from becoming a candidate for the OS's - * Out-Of-Memory Killer. + * Linux-only way to change OOM killer configuration for + * current process. Requires root privileges, which we + * should have. */ -static void -disableOomKiller() { - // Linux-only way to disable OOM killer for current process. Requires root - // privileges, which we should have. - FILE *f = fopen("/proc/self/oom_adj", "w"); - if (f != NULL) { - fprintf(f, "-17"); +static string +setOomScore(const StaticString &score) { + if (!score.empty()) { + string oldScore; + + FILE *f = fopen("/proc/self/oom_adj", "r"); + if (f == NULL) { + return ""; + } + char buf[1024]; + size_t bytesRead; + while (true) { + bytesRead = fread(buf, 1, sizeof(buf), f); + if (bytesRead == 0 && feof(f)) { + break; + } else if (bytesRead == 0 && ferror(f)) { + fclose(f); + return ""; + } else { + oldScore.append(buf, bytesRead); + } + } fclose(f); + + f = fopen("/proc/self/oom_adj", "w"); + if (f == NULL) { + return ""; + } + fwrite(score.data(), 1, score.size(), f); + fclose(f); + + return oldScore; + } else { + return ""; } } /** * Wait until the starter process has exited or sent us an exit command, @@ -915,10 +947,19 @@ } } int main(int argc, char *argv[]) { - disableOomKiller(); + /* + * Most operating systems overcommit memory. We *know* that this watchdog process + * doesn't use much memory; on OS X it uses about 200 KB of private RSS. If the + * watchdog is killed by the system Out-Of-Memory Killer or then it's all over: + * the system administrator will have to restart the web server for Phusion + * Passenger to be usable again. So here we disable Linux's OOM killer + * for this watchdog. Note that the OOM score is inherited by child processes + * so we need to restore it after each fork(). + */ + oldOomScore = setOomScore("-17"); agentsOptions = initializeAgent(argc, argv, "PassengerWatchdog"); logLevel = agentsOptions.getInt("log_level"); webServerPid = agentsOptions.getPid("web_server_pid"); tempDir = agentsOptions.get("temp_dir");