// Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. // https://developers.google.com/protocol-buffers/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.protobuf; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; /** * Immutable sequence of bytes. Substring is supported by sharing the reference * to the immutable underlying bytes, as with {@link String}. Concatenation is * likewise supported without copying (long strings) by building a tree of * pieces in {@link RopeByteString}. *
* Like {@link String}, the contents of a {@link ByteString} can never be
* observed to change, not even in the presence of a data race or incorrect
* API usage in the client code.
*
* @author crazybob@google.com Bob Lee
* @author kenton@google.com Kenton Varda
* @author carlanton@google.com Carl Haverl
* @author martinrb@google.com Martin Buchholz
*/
public abstract class ByteString implements Iterable The returned {@code ByteString} is not necessarily a unique object.
* If the list is empty, the returned object is the singleton empty
* {@code ByteString}. If the list has only one element, that
* {@code ByteString} will be returned without copying.
*
* @param byteStrings strings to be concatenated
* @return new {@code ByteString}
*/
public static ByteString copyFrom(Iterable
* By returning a list, implementations of this method may be able to avoid
* copying even when there are multiple backing arrays.
*
* @return a list of wrapped bytes
*/
public abstract List More precisely, returns {@code true} whenever: This method returns {@code false} for "overlong" byte sequences,
* as well as for 3-byte sequences that would map to a surrogate
* character, in accordance with the restricted definition of UTF-8
* introduced in Unicode 3.1. Note that the UTF-8 decoder included in
* Oracle's JDK has been modified to also reject "overlong" byte
* sequences, but (as of 2011) still accepts 3-byte surrogate
* character byte sequences.
*
* See the Unicode Standard,
* Table 3-6. UTF-8 Bit Distribution,
* Table 3-7. Well Formed UTF-8 Byte Sequences.
*
* @return whether the bytes in this {@code ByteString} are a
* well-formed UTF-8 byte sequence
*/
public abstract boolean isValidUtf8();
/**
* Tells whether the given byte sequence is a well-formed, malformed, or
* incomplete UTF-8 byte sequence. This method accepts and returns a partial
* state result, allowing the bytes for a complete UTF-8 byte sequence to be
* composed from multiple {@code ByteString} segments.
*
* @param state either {@code 0} (if this is the initial decoding operation)
* or the value returned from a call to a partial decoding method for the
* previous bytes
* @param offset offset of the first byte to check
* @param length number of bytes to check
*
* @return {@code -1} if the partial byte sequence is definitely malformed,
* {@code 0} if it is well-formed (no additional input needed), or, if the
* byte sequence is "incomplete", i.e. apparently terminated in the middle of
* a character, an opaque integer "state" value containing enough information
* to decode the character when passed to a subsequent invocation of a
* partial decoding method.
*/
protected abstract int partialIsValidUtf8(int state, int offset, int length);
// =================================================================
// equals() and hashCode()
@Override
public abstract boolean equals(Object o);
/**
* Return a non-zero hashCode depending only on the sequence of bytes
* in this ByteString.
*
* @return hashCode value for this object
*/
@Override
public abstract int hashCode();
// =================================================================
// Input stream
/**
* Creates an {@code InputStream} which can be used to read the bytes.
*
* The {@link InputStream} returned by this method is guaranteed to be
* completely non-blocking. The method {@link InputStream#available()}
* returns the number of bytes remaining in the stream. The methods
* {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)}
* and {@link InputStream#skip(long)} will read/skip as many bytes as are
* available.
*
* The methods in the returned {@link InputStream} might not be
* thread safe.
*
* @return an input stream that returns the bytes of this byte string.
*/
public abstract InputStream newInput();
/**
* Creates a {@link CodedInputStream} which can be used to read the bytes.
* Using this is often more efficient than creating a {@link CodedInputStream}
* that wraps the result of {@link #newInput()}.
*
* @return stream based on wrapped data
*/
public abstract CodedInputStream newCodedInput();
// =================================================================
// Output stream
/**
* Creates a new {@link Output} with the given initial capacity. Call {@link
* Output#toByteString()} to create the {@code ByteString} instance.
*
* A {@link ByteString.Output} offers the same functionality as a
* {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
* rather than a {@code byte} array.
*
* @param initialCapacity estimate of number of bytes to be written
* @return {@code OutputStream} for building a {@code ByteString}
*/
public static Output newOutput(int initialCapacity) {
return new Output(initialCapacity);
}
/**
* Creates a new {@link Output}. Call {@link Output#toByteString()} to create
* the {@code ByteString} instance.
*
* A {@link ByteString.Output} offers the same functionality as a
* {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
* rather than a {@code byte array}.
*
* @return {@code OutputStream} for building a {@code ByteString}
*/
public static Output newOutput() {
return new Output(CONCATENATE_BY_COPY_SIZE);
}
/**
* Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to
* create the {@code ByteString} instance.
*/
public static final class Output extends OutputStream {
// Implementation note.
// The public methods of this class must be synchronized. ByteStrings
// are guaranteed to be immutable. Without some sort of locking, it could
// be possible for one thread to call toByteSring(), while another thread
// is still modifying the underlying byte array.
private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
// argument passed by user, indicating initial capacity.
private final int initialCapacity;
// ByteStrings to be concatenated to create the result
private final ArrayList This is package-private because it's a somewhat confusing interface.
* Users can call {@link Message#toByteString()} instead of calling this
* directly.
*
* @param size The target byte size of the {@code ByteString}. You must write
* exactly this many bytes before building the result.
* @return the builder
*/
static CodedBuilder newCodedBuilder(int size) {
return new CodedBuilder(size);
}
/** See {@link ByteString#newCodedBuilder(int)}. */
static final class CodedBuilder {
private final CodedOutputStream output;
private final byte[] buffer;
private CodedBuilder(int size) {
buffer = new byte[size];
output = CodedOutputStream.newInstance(buffer);
}
public ByteString build() {
output.checkNoSpaceLeft();
// We can be confident that the CodedOutputStream will not modify the
// underlying bytes anymore because it already wrote all of them. So,
// no need to make a copy.
return new LiteralByteString(buffer);
}
public CodedOutputStream getCodedOutput() {
return output;
}
}
// =================================================================
// Methods {@link RopeByteString} needs on instances, which aren't part of the
// public API.
/**
* Return the depth of the tree representing this {@code ByteString}, if any,
* whose root is this node. If this is a leaf node, return 0.
*
* @return tree depth or zero
*/
protected abstract int getTreeDepth();
/**
* Return {@code true} if this ByteString is literal (a leaf node) or a
* flat-enough tree in the sense of {@link RopeByteString}.
*
* @return true if the tree is flat enough
*/
protected abstract boolean isBalanced();
/**
* Return the cached hash code if available.
*
* @return value of cached hash code or 0 if not computed yet
*/
protected abstract int peekCachedHashCode();
/**
* Compute the hash across the value bytes starting with the given hash, and
* return the result. This is used to compute the hash across strings
* represented as a set of pieces by allowing the hash computation to be
* continued from piece to piece.
*
* @param h starting hash value
* @param offset offset into this value to start looking at data values
* @param length number of data values to include in the hash computation
* @return ending hash value
*/
protected abstract int partialHash(int h, int offset, int length);
@Override
public String toString() {
return String.format("true
if the byte sequence represented by the
* argument is a prefix of the byte sequence represented by
* this string; false
otherwise.
*/
public boolean startsWith(ByteString prefix) {
return size() >= prefix.size() &&
substring(0, prefix.size()).equals(prefix);
}
/**
* Tests if this bytestring ends with the specified suffix.
* Similar to {@link String#endsWith(String)}
*
* @param suffix the suffix.
* @return true
if the byte sequence represented by the
* argument is a suffix of the byte sequence represented by
* this string; false
otherwise.
*/
public boolean endsWith(ByteString suffix) {
return size() >= suffix.size() &&
substring(size() - suffix.size()).equals(suffix);
}
// =================================================================
// byte[] -> ByteString
/**
* Copies the given bytes into a {@code ByteString}.
*
* @param bytes source array
* @param offset offset in source array
* @param size number of bytes to copy
* @return new {@code ByteString}
*/
public static ByteString copyFrom(byte[] bytes, int offset, int size) {
byte[] copy = new byte[size];
System.arraycopy(bytes, offset, copy, 0, size);
return new LiteralByteString(copy);
}
/**
* Copies the given bytes into a {@code ByteString}.
*
* @param bytes to copy
* @return new {@code ByteString}
*/
public static ByteString copyFrom(byte[] bytes) {
return copyFrom(bytes, 0, bytes.length);
}
/**
* Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into
* a {@code ByteString}.
*
* @param bytes source buffer
* @param size number of bytes to copy
* @return new {@code ByteString}
*/
public static ByteString copyFrom(ByteBuffer bytes, int size) {
byte[] copy = new byte[size];
bytes.get(copy);
return new LiteralByteString(copy);
}
/**
* Copies the remaining bytes from a {@code java.nio.ByteBuffer} into
* a {@code ByteString}.
*
* @param bytes sourceBuffer
* @return new {@code ByteString}
*/
public static ByteString copyFrom(ByteBuffer bytes) {
return copyFrom(bytes, bytes.remaining());
}
/**
* Encodes {@code text} into a sequence of bytes using the named charset
* and returns the result as a {@code ByteString}.
*
* @param text source string
* @param charsetName encoding to use
* @return new {@code ByteString}
* @throws UnsupportedEncodingException if the encoding isn't found
*/
public static ByteString copyFrom(String text, String charsetName)
throws UnsupportedEncodingException {
return new LiteralByteString(text.getBytes(charsetName));
}
/**
* Encodes {@code text} into a sequence of UTF-8 bytes and returns the
* result as a {@code ByteString}.
*
* @param text source string
* @return new {@code ByteString}
*/
public static ByteString copyFromUtf8(String text) {
try {
return new LiteralByteString(text.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("UTF-8 not supported?", e);
}
}
// =================================================================
// InputStream -> ByteString
/**
* Completely reads the given stream's bytes into a
* {@code ByteString}, blocking if necessary until all bytes are
* read through to the end of the stream.
*
* Performance notes: The returned {@code ByteString} is an
* immutable tree of byte arrays ("chunks") of the stream data. The
* first chunk is small, with subsequent chunks each being double
* the size, up to 8K. If the caller knows the precise length of
* the stream and wishes to avoid all unnecessary copies and
* allocations, consider using the two-argument version of this
* method, below.
*
* @param streamToDrain The source stream, which is read completely
* but not closed.
* @return A new {@code ByteString} which is made up of chunks of
* various sizes, depending on the behavior of the underlying
* stream.
* @throws IOException IOException is thrown if there is a problem
* reading the underlying stream.
*/
public static ByteString readFrom(InputStream streamToDrain)
throws IOException {
return readFrom(
streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE);
}
/**
* Completely reads the given stream's bytes into a
* {@code ByteString}, blocking if necessary until all bytes are
* read through to the end of the stream.
*
* Performance notes: The returned {@code ByteString} is an
* immutable tree of byte arrays ("chunks") of the stream data. The
* chunkSize parameter sets the size of these byte arrays. In
* particular, if the chunkSize is precisely the same as the length
* of the stream, unnecessary allocations and copies will be
* avoided. Otherwise, the chunks will be of the given size, except
* for the last chunk, which will be resized (via a reallocation and
* copy) to contain the remainder of the stream.
*
* @param streamToDrain The source stream, which is read completely
* but not closed.
* @param chunkSize The size of the chunks in which to read the
* stream.
* @return A new {@code ByteString} which is made up of chunks of
* the given size.
* @throws IOException IOException is thrown if there is a problem
* reading the underlying stream.
*/
public static ByteString readFrom(InputStream streamToDrain, int chunkSize)
throws IOException {
return readFrom(streamToDrain, chunkSize, chunkSize);
}
// Helper method that takes the chunk size range as a parameter.
public static ByteString readFrom(InputStream streamToDrain, int minChunkSize,
int maxChunkSize) throws IOException {
Collection {@code
* Arrays.equals(byteString.toByteArray(),
* new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
* }
*
*