/* * Phusion Passenger - https://www.phusionpassenger.com/ * Copyright (c) 2010-2017 Phusion Holding B.V. * * "Passenger", "Phusion Passenger" and "Union Station" are registered * trademarks of Phusion Holding B.V. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ #ifndef _GNU_SOURCE // Needed for IOV_MAX on Linux: // https://bugzilla.redhat.com/show_bug.cgi?id=165427 // Also needed for SO_PEERCRED. #define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // Also for __GLIBC__ macro. #include #include #include #include #include #ifdef __linux__ // For accept4 macros #include #include #endif #if defined(__APPLE__) #define HAVE_FPURGE #elif defined(__GLIBC__) #include #define HAVE___FPURGE #endif #include #include #include #include #include #include namespace Passenger { using namespace std; using namespace oxt; // Urgh, Solaris :-( #ifndef AF_LOCAL #define AF_LOCAL AF_UNIX #endif #ifndef PF_LOCAL #define PF_LOCAL PF_UNIX #endif static WritevFunction writevFunction = syscalls::writev; bool purgeStdio(FILE *f) { #if defined(HAVE_FPURGE) fpurge(f); return true; #elif defined(HAVE___FPURGE) __fpurge(f); return true; #else return false; #endif } ServerAddressType getSocketAddressType(const StaticString &address) { const char *data = address.c_str(); size_t len = address.size(); if (len > sizeof("unix:") - 1 && memcmp(data, "unix:", sizeof("unix:") - 1) == 0) { return SAT_UNIX; } else if (len > sizeof("tcp://") - 1 && memcmp(data, "tcp://", sizeof("tcp://") - 1) == 0) { return SAT_TCP; } else { return SAT_UNKNOWN; } } string parseUnixSocketAddress(const StaticString &address) { if (getSocketAddressType(address) != SAT_UNIX) { throw ArgumentException("Not a valid Unix socket address"); } return string(address.c_str() + sizeof("unix:") - 1, address.size() - sizeof("unix:") + 1); } void parseTcpSocketAddress(const StaticString &address, string &host, unsigned short &port) { if (getSocketAddressType(address) != SAT_TCP) { throw ArgumentException("Not a valid TCP socket address"); } StaticString hostAndPort(address.data() + sizeof("tcp://") - 1, address.size() - sizeof("tcp://") + 1); if (hostAndPort.empty()) { throw ArgumentException("Not a valid TCP socket address"); } if (hostAndPort[0] == '[') { // IPv6 address, e.g.: // [::1]:3000 const char *hostEnd = (const char *) memchr(hostAndPort.data(), ']', hostAndPort.size()); if (hostEnd == NULL || hostAndPort.size() <= string::size_type(hostEnd - hostAndPort.data()) + 3) { throw ArgumentException("Not a valid TCP socket address"); } const char *sep = hostEnd + 1; host.assign(hostAndPort.data() + 1, hostEnd - hostAndPort.data() - 1); port = stringToUint(StaticString( sep + 1, hostAndPort.data() + hostAndPort.size() - sep - 1 )); } else { // IPv4 address, e.g.: // 127.0.0.1:3000 const char *sep = (const char *) memchr(hostAndPort.data(), ':', hostAndPort.size()); if (sep == NULL || hostAndPort.size() <= string::size_type(sep - hostAndPort.data()) + 2) { throw ArgumentException("Not a valid TCP socket address"); } host.assign(hostAndPort.data(), sep - hostAndPort.data()); port = stringToUint(StaticString( sep + 1, hostAndPort.data() + hostAndPort.size() - sep - 1 )); } } bool isLocalSocketAddress(const StaticString &address) { switch (getSocketAddressType(address)) { case SAT_UNIX: return true; case SAT_TCP: { string host; unsigned short port; parseTcpSocketAddress(address, host, port); return host == "127.0.0.1" || host == "::1" || host == "localhost"; } default: throw ArgumentException("Unsupported socket address type"); } } void setBlocking(int fd) { int flags, ret; do { flags = fcntl(fd, F_GETFL); } while (flags == -1 && errno == EINTR); if (flags == -1) { int e = errno; throw SystemException("Cannot set socket to blocking mode: " "cannot get socket flags", e); } do { ret = fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); } while (ret == -1 && errno == EINTR); if (ret == -1) { int e = errno; throw SystemException("Cannot set socket to blocking mode: " "cannot set socket flags", e); } } void setNonBlocking(int fd) { int flags, ret; do { flags = fcntl(fd, F_GETFL); } while (flags == -1 && errno == EINTR); if (flags == -1) { int e = errno; throw SystemException("Cannot set socket to non-blocking mode: " "cannot get socket flags", e); } do { ret = fcntl(fd, F_SETFL, flags | O_NONBLOCK); } while (ret == -1 && errno == EINTR); if (ret == -1) { int e = errno; throw SystemException("Cannot set socket to non-blocking mode: " "cannot set socket flags", e); } } int callAccept4(int sock, struct sockaddr *addr, socklen_t *addr_len, int options) { #if defined(HAVE_ACCEPT4) int ret; do { ret = ::accept4(sock, addr, addr_len, options); } while (ret == -1 && errno == EINTR); return ret; #elif defined(__linux__) && defined(__x86_64__) int ret; do { ret = syscall(288, sock, addr, addr_len, options); } while (ret == -1 && errno == EINTR); return ret; #else errno = ENOSYS; return -1; #endif } vector resolveHostname(const string &hostname, unsigned int port, bool shuffle) { string portString = toString(port); struct addrinfo hints, *res, *current; vector result; int ret; memset(&hints, 0, sizeof(hints)); hints.ai_family = PF_UNSPEC; hints.ai_socktype = SOCK_STREAM; ret = getaddrinfo(hostname.c_str(), (port == 0) ? NULL : portString.c_str(), &hints, &res); if (ret != 0) { throw IOException(string("Error resolving ") + hostname + ": " + gai_strerror(ret)); } for (current = res; current != NULL; current = current->ai_next) { char host[NI_MAXHOST]; ret = getnameinfo(current->ai_addr, current->ai_addrlen, host, sizeof(host) - 1, NULL, 0, NI_NUMERICHOST); if (ret == 0) { result.push_back(host); } else { P_WARN("Cannot get name info for one of the resolved " "IP addresses in host name " << hostname); } } freeaddrinfo(res); if (shuffle) { random_shuffle(result.begin(), result.end()); } return result; } int createServer(const StaticString &address, unsigned int backlogSize, bool autoDelete, const char *file, unsigned int line) { TRACE_POINT(); switch (getSocketAddressType(address)) { case SAT_UNIX: return createUnixServer(parseUnixSocketAddress(address), backlogSize, autoDelete, file, line); case SAT_TCP: { string host; unsigned short port; parseTcpSocketAddress(address, host, port); return createTcpServer(host.c_str(), port, backlogSize, file, line); } default: throw ArgumentException(string("Unknown address type for '") + address + "'"); } } int createUnixServer(const StaticString &filename, unsigned int backlogSize, bool autoDelete, const char *file, unsigned int line) { struct sockaddr_un addr; int fd, ret; if (filename.size() > sizeof(addr.sun_path) - 1) { string message = "Cannot create Unix socket '"; message.append(filename.toString()); message.append("': filename is too long."); throw RuntimeException(message); } fd = syscalls::socket(PF_LOCAL, SOCK_STREAM, 0); if (fd == -1) { int e = errno; throw SystemException("Cannot create a Unix socket file descriptor", e); } FdGuard guard(fd, file, line, true); addr.sun_family = AF_LOCAL; strncpy(addr.sun_path, filename.c_str(), filename.size()); addr.sun_path[filename.size()] = '\0'; if (autoDelete) { do { ret = unlink(filename.c_str()); } while (ret == -1 && errno == EINTR); } ret = syscalls::bind(fd, (const struct sockaddr *) &addr, sizeof(addr)); if (ret == -1) { int e = errno; string message = "Cannot bind Unix socket '"; message.append(filename.toString()); message.append("'"); throw SystemException(message, e); } if (backlogSize == 0) { backlogSize = 1024; } ret = syscalls::listen(fd, backlogSize); if (ret == -1) { int e = errno; string message = "Cannot listen on Unix socket '"; message.append(filename.toString()); message.append("'"); safelyClose(fd, true); throw SystemException(message, e); } guard.clear(); return fd; } int createTcpServer(const char *address, unsigned short port, unsigned int backlogSize, const char *file, unsigned int line) { union { struct sockaddr_in v4; struct sockaddr_in6 v6; } addr; sa_family_t family; int fd, ret, optval; memset(&addr, 0, sizeof(addr)); family = addr.v4.sin_family = AF_INET; ret = inet_pton(AF_INET, address, &addr.v4.sin_addr.s_addr); if (ret == 0) { // Might be an IPv6 address. memset(&addr, 0, sizeof(addr)); family = addr.v6.sin6_family = AF_INET6; ret = inet_pton(AF_INET6, address, &addr.v6.sin6_addr.s6_addr); } if (ret < 0) { int e = errno; string message = "Cannot parse the IP address '"; message.append(address); message.append("'"); throw SystemException(message, e); } else if (ret == 0) { string message = "Cannot parse the IP address '"; message.append(address); message.append("'"); throw ArgumentException(message); } if (family == AF_INET) { addr.v4.sin_port = htons(port); fd = syscalls::socket(PF_INET, SOCK_STREAM, 0); } else { addr.v6.sin6_port = htons(port); fd = syscalls::socket(PF_INET6, SOCK_STREAM, 0); } if (fd == -1) { int e = errno; throw SystemException("Cannot create a TCP socket file descriptor", e); } optval = 1; if (syscalls::setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) == -1) { int e = errno; fprintf(stderr, "so_reuseaddr failed: %s\n", strerror(e)); } // Ignore SO_REUSEADDR error, it's not fatal. FdGuard guard(fd, file, line, true); if (family == AF_INET) { ret = syscalls::bind(fd, (const struct sockaddr *) &addr.v4, sizeof(struct sockaddr_in)); } else { ret = syscalls::bind(fd, (const struct sockaddr *) &addr.v6, sizeof(struct sockaddr_in6)); } if (ret == -1) { int e = errno; string message = "Cannot bind a TCP socket on address '"; message.append(address); message.append("' port "); message.append(toString(port)); throw SystemException(message, e); } if (backlogSize == 0) { backlogSize = DEFAULT_SOCKET_BACKLOG; } ret = syscalls::listen(fd, backlogSize); if (ret == -1) { int e = errno; string message = "Cannot listen on TCP socket '"; message.append(address); message.append("' port "); message.append(toString(port)); throw SystemException(message, e); } guard.clear(); return fd; } int connectToServer(const StaticString &address, const char *file, unsigned int line) { TRACE_POINT(); switch (getSocketAddressType(address)) { case SAT_UNIX: return connectToUnixServer(parseUnixSocketAddress(address), file, line); case SAT_TCP: { string host; unsigned short port; parseTcpSocketAddress(address, host, port); return connectToTcpServer(host, port, file, line); } default: throw ArgumentException(string("Unknown address type for '") + address + "'"); } } int connectToUnixServer(const StaticString &filename, const char *file, unsigned int line) { int fd = syscalls::socket(PF_UNIX, SOCK_STREAM, 0); if (fd == -1) { int e = errno; throw SystemException("Cannot create a Unix socket file descriptor", e); } FdGuard guard(fd, file, line, true); int ret; struct sockaddr_un addr; if (filename.size() > sizeof(addr.sun_path) - 1) { string message = "Cannot connect to Unix socket '"; message.append(filename.data(), filename.size()); message.append("': filename is too long."); throw RuntimeException(message); } addr.sun_family = AF_UNIX; memcpy(addr.sun_path, filename.c_str(), filename.size()); addr.sun_path[filename.size()] = '\0'; bool retry = true; int counter = 0; while (retry) { ret = syscalls::connect(fd, (const sockaddr *) &addr, sizeof(addr)); if (ret == -1) { #if defined(sun) || defined(__sun) /* Solaris has this nice kernel bug where connecting to * a newly created Unix socket which is obviously * connectable can cause an ECONNREFUSED. So we retry * in a loop. */ retry = errno == ECONNREFUSED; #else retry = false; #endif retry = retry && counter < 9; if (retry) { syscalls::usleep((useconds_t) (10000 * pow((double) 2, (double) counter))); counter++; } else { int e = errno; string message("Cannot connect to Unix socket '"); message.append(filename.toString()); message.append("'"); throw SystemException(message, e); } } else { guard.clear(); return fd; } } abort(); // Never reached. return -1; // Shut up compiler warning. } void setupNonBlockingUnixSocket(NUnix_State &state, const StaticString &filename, const char *file, unsigned int line) { state.fd.assign(syscalls::socket(PF_UNIX, SOCK_STREAM, 0), file, line); if (state.fd == -1) { int e = errno; throw SystemException("Cannot create a Unix socket file descriptor", e); } state.filename = filename; setNonBlocking(state.fd); } bool connectToUnixServer(NUnix_State &state) { struct sockaddr_un addr; int ret; if (state.filename.size() > sizeof(addr.sun_path) - 1) { string message = "Cannot connect to Unix socket '"; message.append(state.filename.data(), state.filename.size()); message.append("': filename is too long."); throw RuntimeException(message); } addr.sun_family = AF_UNIX; memcpy(addr.sun_path, state.filename.data(), state.filename.size()); addr.sun_path[state.filename.size()] = '\0'; ret = syscalls::connect(state.fd, (const sockaddr *) &addr, sizeof(addr)); if (ret == -1) { if (errno == EINPROGRESS || errno == EWOULDBLOCK) { return false; } else if (errno == EISCONN) { return true; } else { int e = errno; string message = "Cannot connect to Unix socket '"; message.append(state.filename.data(), state.filename.size()); throw SystemException(message, e); } } else { return true; } } int connectToTcpServer(const StaticString &hostname, unsigned int port, const char *file, unsigned int line) { struct addrinfo hints, *res; int ret, e, fd; memset(&hints, 0, sizeof(hints)); hints.ai_family = PF_UNSPEC; hints.ai_socktype = SOCK_STREAM; ret = getaddrinfo(hostname.c_str(), toString(port).c_str(), &hints, &res); if (ret != 0) { string message = "Cannot resolve IP address '"; message.append(hostname.toString()); message.append(":"); message.append(toString(port)); message.append("': "); message.append(gai_strerror(ret)); throw IOException(message); } try { fd = syscalls::socket(PF_INET, SOCK_STREAM, 0); } catch (...) { freeaddrinfo(res); throw; } if (fd == -1) { e = errno; freeaddrinfo(res); throw SystemException("Cannot create a TCP socket file descriptor", e); } try { ret = syscalls::connect(fd, res->ai_addr, res->ai_addrlen); } catch (...) { freeaddrinfo(res); safelyClose(fd, true); throw; } e = errno; freeaddrinfo(res); if (ret == -1) { string message = "Cannot connect to TCP socket '"; message.append(hostname.toString()); message.append(":"); message.append(toString(port)); message.append("'"); safelyClose(fd, true); throw SystemException(message, e); } P_LOG_FILE_DESCRIPTOR_OPEN3(fd, file, line); return fd; } void setupNonBlockingTcpSocket(NTCP_State &state, const StaticString &hostname, int port, const char *file, unsigned int line) { int ret; memset(&state.hints, 0, sizeof(state.hints)); state.hints.ai_family = PF_UNSPEC; state.hints.ai_socktype = SOCK_STREAM; ret = getaddrinfo(hostname.toString().c_str(), toString(port).c_str(), &state.hints, &state.res); if (ret != 0) { string message = "Cannot resolve IP address '"; message.append(hostname.data(), hostname.size()); message.append(":"); message.append(toString(port)); message.append("': "); message.append(gai_strerror(ret)); throw IOException(message); } state.fd.assign(syscalls::socket(PF_INET, SOCK_STREAM, 0), file, line); if (state.fd == -1) { int e = errno; throw SystemException("Cannot create a TCP socket file descriptor", e); } state.hostname = hostname; state.port = port; setNonBlocking(state.fd); } bool connectToTcpServer(NTCP_State &state) { int ret; ret = syscalls::connect(state.fd, state.res->ai_addr, state.res->ai_addrlen); if (ret == -1) { if (errno == EINPROGRESS || errno == EWOULDBLOCK) { return false; } else if (errno == EISCONN) { freeaddrinfo(state.res); state.res = NULL; return true; } else { int e = errno; string message = "Cannot connect to TCP socket '"; message.append(state.hostname); message.append(":"); message.append(toString(state.port)); message.append("'"); throw SystemException(message, e); } } else { freeaddrinfo(state.res); state.res = NULL; return true; } } void setupNonBlockingSocket(NConnect_State &state, const StaticString &address, const char *file, unsigned int line) { TRACE_POINT(); state.type = getSocketAddressType(address); switch (state.type) { case SAT_UNIX: setupNonBlockingUnixSocket(state.s_unix, parseUnixSocketAddress(address), file, line); break; case SAT_TCP: { string host; unsigned short port; parseTcpSocketAddress(address, host, port); setupNonBlockingTcpSocket(state.s_tcp, host, port, file, line); break; } default: throw ArgumentException(string("Unknown address type for '") + address + "'"); } } bool connectToServer(NConnect_State &state) { switch (state.type) { case SAT_UNIX: return connectToUnixServer(state.s_unix); case SAT_TCP: return connectToTcpServer(state.s_tcp); default: throw RuntimeException("Unknown address type"); } } bool pingTcpServer(const StaticString &host, unsigned int port, unsigned long long *timeout) { TRACE_POINT(); NTCP_State state; setupNonBlockingTcpSocket(state, host, port, __FILE__, __LINE__); try { if (connectToTcpServer(state)) { return true; } } catch (const SystemException &e) { if (e.code() == ECONNREFUSED) { return false; } else { throw e; } } // Cannot connect to the port yet, but that may not mean the // port is unavailable. So poll the socket. bool connectable; try { connectable = waitUntilWritable(state.fd, timeout); } catch (const SystemException &e) { throw SystemException("Error polling TCP socket " + host + ":" + toString(port), e.code()); } if (!connectable) { // Timed out. Assume port is not available. return false; } // Try to connect the socket one last time. try { return connectToTcpServer(state); } catch (const SystemException &e) { if (e.code() == ECONNREFUSED) { return false; } else if (e.code() == EISCONN || e.code() == EINVAL) { #ifdef __FreeBSD__ // Work around bug in FreeBSD (discovered on // January 20 2013 in daemon_controller) return false; #else throw e; #endif // Never reached, shut up compiler warning. return false; } else { throw e; } } } SocketPair createUnixSocketPair(const char *file, unsigned int line) { int fds[2]; FileDescriptor sockets[2]; if (syscalls::socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == -1) { int e = errno; throw SystemException("Cannot create a Unix socket pair", e); } else { sockets[0].assign(fds[0], file, line); sockets[1].assign(fds[1], file, line); return SocketPair(sockets[0], sockets[1]); } } Pipe createPipe(const char *file, unsigned int line) { int fds[2]; FileDescriptor p[2]; if (syscalls::pipe(fds) == -1) { int e = errno; throw SystemException("Cannot create a pipe", e); } else { p[0].assign(fds[0], file, line); p[1].assign(fds[1], file, line); return Pipe(p[0], p[1]); } } static bool waitUntilIOEvent(int fd, short event, unsigned long long *timeout) { struct pollfd pfd; int ret; pfd.fd = fd; pfd.events = event; pfd.revents = 0; Timer<> timer; ret = syscalls::poll(&pfd, 1, *timeout / 1000); if (ret == -1) { int e = errno; throw SystemException("poll() failed", e); } else { unsigned long long elapsed = timer.usecElapsed(); if (elapsed > *timeout) { *timeout = 0; } else { *timeout -= elapsed; } return ret != 0; } } bool waitUntilReadable(int fd, unsigned long long *timeout) { return waitUntilIOEvent(fd, POLLIN, timeout); } bool waitUntilWritable(int fd, unsigned long long *timeout) { return waitUntilIOEvent(fd, POLLOUT | POLLHUP, timeout); } unsigned int readExact(int fd, void *buf, unsigned int size, unsigned long long *timeout) { ssize_t ret; unsigned int alreadyRead = 0; while (alreadyRead < size) { if (timeout != NULL && !waitUntilReadable(fd, timeout)) { throw TimeoutException("Cannot read enough data within the specified timeout"); } ret = syscalls::read(fd, (char *) buf + alreadyRead, size - alreadyRead); if (ret == -1) { int e = errno; throw SystemException("read() failed", e); } else if (ret == 0) { return alreadyRead; } else { alreadyRead += ret; } } return alreadyRead; } void writeExact(int fd, const void *data, unsigned int size, unsigned long long *timeout) { ssize_t ret; unsigned int written = 0; while (written < size) { if (timeout != NULL && !waitUntilWritable(fd, timeout)) { throw TimeoutException("Cannot write enough data within the specified timeout"); } ret = syscalls::write(fd, (const char *) data + written, size - written); if (ret == -1) { int e = errno; throw SystemException("write() failed", e); } else { written += ret; } } } void writeExact(int fd, const StaticString &data, unsigned long long *timeout) { const char * restrict data_ptr = data.data(); writeExact(fd, data_ptr, data.size(), timeout); } /** * Converts an array of StaticStrings to a corresponding array of iovec structures, * returning the size sum in bytes of all StaticStrings. */ static size_t staticStringArrayToIoVec(const StaticString ary[], size_t count, struct iovec *vec, size_t &vecCount) { size_t total = 0; size_t i; for (i = 0, vecCount = 0; i < count; i++) { /* No idea whether all writev() implementations support iov_len == 0, * but I'd rather not risk finding out. */ if (ary[i].size() > 0) { /* I know writev() doesn't write to iov_base, but on some * platforms it's still defined as non-const char * * :-( */ vec[vecCount].iov_base = (char *) ary[i].data(); vec[vecCount].iov_len = ary[i].size(); total += ary[i].size(); vecCount++; } } return total; } /** * Suppose that the given IO vectors are placed adjacent to each other * in a single contiguous block of memory. Given a position inside this * block of memory, this function will calculate the index in the IO vector * array and the offset inside that IO vector that corresponds with * the position. * * For example, given the following array of IO vectors: * { "AAA", "BBBB", "CC" } * Position 0 would correspond to the first item, offset 0. * Position 1 would correspond to the first item, offset 1. * Position 5 would correspond to the second item, offset 2. * And so forth. * * If the position is outside the bounds of the array, then index will be * set to count + 1 and offset to 0. */ static void findDataPositionIndexAndOffset(struct iovec data[], size_t count, size_t position, size_t * restrict index, size_t * restrict offset) { size_t i; size_t begin = 0; for (i = 0; i < count; i++) { size_t end = begin + data[i].iov_len; if (OXT_LIKELY(begin <= position)) { if (position < end) { *index = i; *offset = position - begin; return; } else { begin = end; } } else { // Never reached. abort(); } } *index = count; *offset = 0; } static ssize_t realGatheredWrite(int fd, const StaticString *data, unsigned int dataCount, string &restBuffer, struct iovec *iov) { size_t totalSize, iovCount, i; ssize_t ret; if (restBuffer.empty()) { totalSize = staticStringArrayToIoVec(data, dataCount, iov, iovCount); if (totalSize == 0) { errno = 0; return 0; } ret = writevFunction(fd, iov, std::min(iovCount, (size_t) IOV_MAX)); if (ret == -1) { if (errno == EAGAIN || errno == EWOULDBLOCK) { // Nothing could be written without blocking, so put // everything in the rest buffer. int e = errno; restBuffer.reserve(totalSize); for (i = 0; i < iovCount; i++) { restBuffer.append((const char *) iov[i].iov_base, iov[i].iov_len); } errno = e; return 0; } else { return -1; } } else if ((size_t) ret < totalSize) { size_t index, offset; // Put all unsent data in the rest buffer. restBuffer.reserve(ret); findDataPositionIndexAndOffset(iov, iovCount, ret, &index, &offset); for (i = index; i < iovCount; i++) { if (i == index) { restBuffer.append( ((const char *) iov[i].iov_base) + offset, iov[i].iov_len - offset); } else { restBuffer.append( (const char *) iov[i].iov_base, iov[i].iov_len); } } // TODO: we should call writev() again if iovCount > iovMax // in order to send out the rest of the data without // putting them in the rest buffer. return ret; } else { // Everything is sent, and the rest buffer was empty anyway, so // just return. return totalSize; } } else { iov[0].iov_base = (char *) restBuffer.data(); iov[0].iov_len = restBuffer.size(); totalSize = staticStringArrayToIoVec(data, dataCount, iov + 1, iovCount); totalSize += restBuffer.size(); iovCount++; ret = writevFunction(fd, iov, std::min(iovCount, (size_t) IOV_MAX)); if (ret == -1) { if (errno == EAGAIN || errno == EWOULDBLOCK) { // Nothing could be written without blocking, so // append all data into the rest buffer. int e = errno; restBuffer.reserve(totalSize); for (i = 1; i < iovCount; i++) { restBuffer.append( (const char *) iov[i].iov_base, iov[i].iov_len); } errno = e; return 0; } else { return -1; } } else { string::size_type restBufferSize = restBuffer.size(); size_t restBufferSent = std::min((size_t) ret, (size_t) restBufferSize); // Remove everything in the rest buffer that we've been able to send. restBuffer.erase(0, restBufferSent); if (restBuffer.empty()) { size_t index, offset; // Looks like everything in the rest buffer was sent. // Put all unsent data into the rest buffer. findDataPositionIndexAndOffset(iov, iovCount, ret, &index, &offset); for (i = index; i < iovCount; i++) { if (i == index) { restBuffer.append( ((const char *) iov[i].iov_base) + offset, iov[i].iov_len - offset); } else { restBuffer.append( (const char *) iov[i].iov_base, iov[i].iov_len); } } // TODO: we should call writev() again if // iovCount > iovMax && ret < totalSize // in order to send out the rest of the data without // putting them in the rest buffer. } else { // The rest buffer could only be partially sent out, so // nothing in 'data' could be sent. Append everything // in 'data' into the rest buffer. restBuffer.reserve(totalSize - ret); for (i = 1; i < iovCount; i++) { restBuffer.append( (const char *) iov[i].iov_base, iov[i].iov_len); } } return ret; } } } ssize_t gatheredWrite(int fd, const StaticString *data, unsigned int dataCount, string &restBuffer) { if (dataCount < 8) { struct iovec iov[8]; return realGatheredWrite(fd, data, dataCount, restBuffer, iov); } else { vector iov; iov.reserve(dataCount + 1); return realGatheredWrite(fd, data, dataCount, restBuffer, &iov[0]); } } static size_t eraseBeginningOfIoVec(struct iovec *iov, size_t count, size_t index, size_t offset) { size_t i, newCount; for (i = index, newCount = 0; i < count; i++, newCount++) { if (newCount == 0) { iov[newCount].iov_base = (char *) iov[i].iov_base + offset; iov[newCount].iov_len = iov[i].iov_len - offset; } else { iov[newCount].iov_base = iov[i].iov_base; iov[newCount].iov_len = iov[i].iov_len; } } return newCount; } static void realGatheredWrite(int fd, const StaticString *data, unsigned int count, unsigned long long *timeout, struct iovec *iov) { size_t total, iovCount; size_t written = 0; total = staticStringArrayToIoVec(data, count, iov, iovCount); while (written < total) { if (timeout != NULL && !waitUntilWritable(fd, timeout)) { throw TimeoutException("Cannot write enough data within the specified timeout"); } ssize_t ret = writevFunction(fd, iov, std::min(iovCount, (size_t) IOV_MAX)); if (ret == -1) { int e = errno; throw SystemException("Unable to write all data", e); } else { size_t index, offset; written += ret; findDataPositionIndexAndOffset(iov, iovCount, ret, &index, &offset); iovCount = eraseBeginningOfIoVec(iov, iovCount, index, offset); } } assert(written == total); } void gatheredWrite(int fd, const StaticString *data, unsigned int count, unsigned long long *timeout) { if (count <= 8) { struct iovec iov[8]; realGatheredWrite(fd, data, count, timeout, iov); } else { vector iov; iov.reserve(count); realGatheredWrite(fd, data, count, timeout, &iov[0]); } } void setWritevFunction(WritevFunction func) { if (func != NULL) { writevFunction = func; } else { writevFunction = syscalls::writev; } } int readFileDescriptor(int fd, unsigned long long *timeout) { if (timeout != NULL && !waitUntilReadable(fd, timeout)) { throw TimeoutException("Cannot receive file descriptor within the specified timeout"); } struct msghdr msg; struct iovec vec; char dummy[1]; #if defined(__APPLE__) || defined(__SOLARIS__) || defined(__arm__) // File descriptor passing macros (CMSG_*) seem to be broken // on 64-bit MacOS X. This structure works around the problem. struct { struct cmsghdr header; int fd; } control_data; #define EXPECTED_CMSG_LEN sizeof(control_data) #else char control_data[CMSG_SPACE(sizeof(int))]; #define EXPECTED_CMSG_LEN CMSG_LEN(sizeof(int)) #endif struct cmsghdr *control_header; int ret; msg.msg_name = NULL; msg.msg_namelen = 0; dummy[0] = '\0'; vec.iov_base = dummy; vec.iov_len = sizeof(dummy); msg.msg_iov = &vec; msg.msg_iovlen = 1; msg.msg_control = (caddr_t) &control_data; msg.msg_controllen = sizeof(control_data); msg.msg_flags = 0; ret = syscalls::recvmsg(fd, &msg, 0); if (ret == -1) { throw SystemException("Cannot read file descriptor with recvmsg()", errno); } control_header = CMSG_FIRSTHDR(&msg); if (control_header == NULL) { throw IOException("No valid file descriptor received."); } if (control_header->cmsg_len != EXPECTED_CMSG_LEN || control_header->cmsg_level != SOL_SOCKET || control_header->cmsg_type != SCM_RIGHTS) { throw IOException("No valid file descriptor received."); } #if defined(__APPLE__) || defined(__SOLARIS__) || defined(__arm__) return control_data.fd; #else return *((int *) CMSG_DATA(control_header)); #endif } void writeFileDescriptor(int fd, int fdToSend, unsigned long long *timeout) { if (timeout != NULL && !waitUntilWritable(fd, timeout)) { throw TimeoutException("Cannot send file descriptor within the specified timeout"); } struct msghdr msg; struct iovec vec; char dummy[1]; #if defined(__APPLE__) || defined(__SOLARIS__) || defined(__arm__) struct { struct cmsghdr header; int fd; } control_data; #else char control_data[CMSG_SPACE(sizeof(int))]; #endif struct cmsghdr *control_header; int ret; memset(&msg, 0, sizeof(msg)); memset(&control_data, 0, sizeof(control_data)); msg.msg_name = NULL; msg.msg_namelen = 0; /* Linux and Solaris require msg_iov to be non-NULL. */ dummy[0] = '\0'; vec.iov_base = dummy; vec.iov_len = sizeof(dummy); msg.msg_iov = &vec; msg.msg_iovlen = 1; msg.msg_control = (caddr_t) &control_data; msg.msg_controllen = sizeof(control_data); msg.msg_flags = 0; control_header = CMSG_FIRSTHDR(&msg); control_header->cmsg_level = SOL_SOCKET; control_header->cmsg_type = SCM_RIGHTS; #if defined(__APPLE__) || defined(__SOLARIS__) || defined(__arm__) control_header->cmsg_len = sizeof(control_data); control_data.fd = fdToSend; #else control_header->cmsg_len = CMSG_LEN(sizeof(int)); memcpy(CMSG_DATA(control_header), &fdToSend, sizeof(int)); #endif ret = syscalls::sendmsg(fd, &msg, 0); if (ret == -1) { throw SystemException("Cannot send file descriptor with sendmsg()", errno); } } void readPeerCredentials(int sock, uid_t *uid, gid_t *gid) { union { struct sockaddr genericAddress; struct sockaddr_un unixAddress; struct sockaddr_in inetAddress; struct sockaddr_in6 inetAddress6; } addr; socklen_t len = sizeof(addr); int ret; /* * The functions for receiving the peer credentials are not guaranteed to * fail if the socket is not a Unix domain socket. For example, OS X getpeereid() * just returns garbage when invoked on a TCP socket. So we check here * whether 'sock' is a Unix domain socket. */ do { ret = getsockname(sock, &addr.genericAddress, &len); } while (ret == -1 && errno == EINTR); if (ret == -1) { int e = errno; throw SystemException("Unable to autodetect socket type (getsockname() failed)", e); } if (addr.genericAddress.sa_family != AF_LOCAL) { throw SystemException("Cannot receive process credentials: the connection is not a Unix domain socket", EPROTONOSUPPORT); } #if defined(__linux__) struct ucred credentials; socklen_t ucred_length = sizeof(struct ucred); if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &credentials, &ucred_length) != 0) { int e = errno; throw SystemException("Cannot receive process credentials over Unix domain socket", e); } *uid = credentials.uid; *gid = credentials.gid; #elif defined(__FreeBSD__) || defined(__APPLE__) if (getpeereid(sock, uid, gid) == -1) { int e = errno; throw SystemException("Cannot receive process credentials over Unix domain socket", e); } #else throw SystemException("Cannot receive process credentials over Unix domain socket", ENOSYS); #endif } void safelyClose(int fd, bool ignoreErrors) { if (syscalls::close(fd) == -1) { /* FreeBSD has a kernel bug which can cause close() to return ENOTCONN. * This is harmless, ignore it. We check for this problem on all * platforms because some OSes might borrow Unix domain socket * code from FreeBSD. * http://www.freebsd.org/cgi/query-pr.cgi?pr=79138 * http://www.freebsd.org/cgi/query-pr.cgi?pr=144061 */ if (errno != ENOTCONN && !ignoreErrors) { int e = errno; throw SystemException("Cannot close file descriptor", e); } } } string readAll(const string &filename) { FILE *f = fopen(filename.c_str(), "rb"); if (f != NULL) { StdioGuard guard(f, NULL, 0); return readAll(fileno(f)); } else { int e = errno; throw FileSystemException("Cannot open '" + filename + "' for reading", e, filename); } } string readAll(int fd) { string result; char buf[1024 * 32]; ssize_t ret; while (true) { do { ret = read(fd, buf, sizeof(buf)); } while (ret == -1 && errno == EINTR); if (ret == 0) { break; } else if (ret == -1) { if (errno == ECONNRESET) { break; } else { int e = errno; throw SystemException("Cannot read from file descriptor", e); } } else { result.append(buf, ret); } } return result; } } // namespace Passenger