Merge changes from topic "jul15"

* changes:
  Offer explicit 3-byte vs 4-byte modified UTF-8.
  Cache least recently used output buffer.
This commit is contained in:
Jeff Sharkey 2022-07-19 18:59:31 +00:00 committed by Gerrit Code Review
commit 0e27faa039
12 changed files with 552 additions and 53 deletions

View File

@ -52,21 +52,45 @@ public class FastDataPerfTest {
while (state.keepRunning()) {
os.reset();
final BufferedOutputStream bos = new BufferedOutputStream(os, BUFFER_SIZE);
final DataOutput out = new DataOutputStream(bos);
doWrite(out);
bos.flush();
final DataOutputStream out = new DataOutputStream(bos);
try {
doWrite(out);
out.flush();
} finally {
out.close();
}
}
}
@Test
public void timeWrite_Local() throws IOException {
public void timeWrite_LocalUsing4ByteSequences() throws IOException {
final ByteArrayOutputStream os = new ByteArrayOutputStream(OUTPUT_SIZE);
final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
while (state.keepRunning()) {
os.reset();
final FastDataOutput out = new FastDataOutput(os, BUFFER_SIZE);
doWrite(out);
out.flush();
final FastDataOutput out = FastDataOutput.obtainUsing4ByteSequences(os);
try {
doWrite(out);
out.flush();
} finally {
out.release();
}
}
}
@Test
public void timeWrite_LocalUsing3ByteSequences() throws IOException {
final ByteArrayOutputStream os = new ByteArrayOutputStream(OUTPUT_SIZE);
final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
while (state.keepRunning()) {
os.reset();
final FastDataOutput out = FastDataOutput.obtainUsing3ByteSequences(os);
try {
doWrite(out);
out.flush();
} finally {
out.release();
}
}
}
@ -77,19 +101,42 @@ public class FastDataPerfTest {
while (state.keepRunning()) {
is.reset();
final BufferedInputStream bis = new BufferedInputStream(is, BUFFER_SIZE);
final DataInput in = new DataInputStream(bis);
doRead(in);
final DataInputStream in = new DataInputStream(bis);
try {
doRead(in);
} finally {
in.close();
}
}
}
@Test
public void timeRead_Local() throws Exception {
public void timeRead_LocalUsing4ByteSequences() throws Exception {
final ByteArrayInputStream is = new ByteArrayInputStream(doWrite());
final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
while (state.keepRunning()) {
is.reset();
final DataInput in = new FastDataInput(is, BUFFER_SIZE);
doRead(in);
final FastDataInput in = FastDataInput.obtainUsing4ByteSequences(is);
try {
doRead(in);
} finally {
in.release();
}
}
}
@Test
public void timeRead_LocalUsing3ByteSequences() throws Exception {
final ByteArrayInputStream is = new ByteArrayInputStream(doWrite());
final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
while (state.keepRunning()) {
is.reset();
final FastDataInput in = FastDataInput.obtainUsing3ByteSequences(is);
try {
doRead(in);
} finally {
in.release();
}
}
}

View File

@ -26,6 +26,12 @@ import dalvik.annotation.optimization.FastNative;
* <p>
* These methods purposefully accept only non-movable byte array addresses to
* avoid extra JNI overhead.
* <p>
* Callers are cautioned that there is a long-standing ART bug that emits
* non-standard 4-byte sequences, as described by {@code kUtfUse4ByteSequence}
* in {@code art/runtime/jni/jni_internal.cc}. If precise modified UTF-8
* encoding is required, use {@link com.android.internal.util.ModifiedUtf8}
* instead.
*
* @hide
*/
@ -33,6 +39,12 @@ public class CharsetUtils {
/**
* Attempt to encode the given string as modified UTF-8 into the destination
* byte array without making any new allocations.
* <p>
* Callers are cautioned that there is a long-standing ART bug that emits
* non-standard 4-byte sequences, as described by
* {@code kUtfUse4ByteSequence} in {@code art/runtime/jni/jni_internal.cc}.
* If precise modified UTF-8 encoding is required, use
* {@link com.android.internal.util.ModifiedUtf8} instead.
*
* @param src string value to be encoded
* @param dest destination byte array to encode into
@ -50,6 +62,12 @@ public class CharsetUtils {
/**
* Attempt to encode the given string as modified UTF-8 into the destination
* byte array without making any new allocations.
* <p>
* Callers are cautioned that there is a long-standing ART bug that emits
* non-standard 4-byte sequences, as described by
* {@code kUtfUse4ByteSequence} in {@code art/runtime/jni/jni_internal.cc}.
* If precise modified UTF-8 encoding is required, use
* {@link com.android.internal.util.ModifiedUtf8} instead.
*
* @param src string value to be encoded
* @param srcLen exact length of string to be encoded
@ -66,6 +84,12 @@ public class CharsetUtils {
/**
* Attempt to decode a modified UTF-8 string from the source byte array.
* <p>
* Callers are cautioned that there is a long-standing ART bug that emits
* non-standard 4-byte sequences, as described by
* {@code kUtfUse4ByteSequence} in {@code art/runtime/jni/jni_internal.cc}.
* If precise modified UTF-8 encoding is required, use
* {@link com.android.internal.util.ModifiedUtf8} instead.
*
* @param src source byte array to decode from
* @param srcOff offset into source where decoding should begin

View File

@ -0,0 +1,28 @@
{
"presubmit": [
{
"name": "FrameworksCoreTests",
"options": [
{
"include-filter": "android.util.CharsetUtilsTest"
},
{
"include-filter": "com.android.internal.util.FastDataTest"
}
],
"file_patterns": ["CharsetUtils|FastData"]
},
{
"name": "FrameworksCoreTests",
"options": [
{
"include-filter": "android.util.XmlTest"
},
{
"include-filter": "android.util.BinaryXmlTest"
}
],
"file_patterns": ["Xml"]
}
]
}

View File

@ -73,12 +73,6 @@ import java.util.Objects;
* </ul>
*/
public final class BinaryXmlPullParser implements TypedXmlPullParser {
/**
* Default buffer size, which matches {@code FastXmlSerializer}. This should
* be kept in sync with {@link BinaryXmlPullParser}.
*/
private static final int BUFFER_SIZE = 32_768;
private FastDataInput mIn;
private int mCurrentToken = START_DOCUMENT;
@ -100,7 +94,12 @@ public final class BinaryXmlPullParser implements TypedXmlPullParser {
throw new UnsupportedOperationException();
}
mIn = new FastDataInput(is, BUFFER_SIZE);
if (mIn != null) {
mIn.release();
mIn = null;
}
mIn = FastDataInput.obtainUsing4ByteSequences(is);
mCurrentToken = START_DOCUMENT;
mCurrentDepth = 0;

View File

@ -91,12 +91,6 @@ public final class BinaryXmlSerializer implements TypedXmlSerializer {
static final int TYPE_BOOLEAN_TRUE = 12 << 4;
static final int TYPE_BOOLEAN_FALSE = 13 << 4;
/**
* Default buffer size, which matches {@code FastXmlSerializer}. This should
* be kept in sync with {@link BinaryXmlPullParser}.
*/
private static final int BUFFER_SIZE = 32_768;
private FastDataOutput mOut;
/**
@ -124,7 +118,7 @@ public final class BinaryXmlSerializer implements TypedXmlSerializer {
throw new UnsupportedOperationException();
}
mOut = new FastDataOutput(os, BUFFER_SIZE);
mOut = FastDataOutput.obtainUsing4ByteSequences(os);
mOut.write(PROTOCOL_MAGIC_VERSION_0);
mTagCount = 0;
@ -138,7 +132,9 @@ public final class BinaryXmlSerializer implements TypedXmlSerializer {
@Override
public void flush() throws IOException {
mOut.flush();
if (mOut != null) {
mOut.flush();
}
}
@Override
@ -157,6 +153,9 @@ public final class BinaryXmlSerializer implements TypedXmlSerializer {
public void endDocument() throws IOException {
mOut.writeByte(END_DOCUMENT | TYPE_NULL);
flush();
mOut.release();
mOut = null;
}
@Override

View File

@ -30,6 +30,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicReference;
/**
* Optimized implementation of {@link DataInput} which buffers data in memory
@ -41,13 +42,18 @@ import java.util.Objects;
public class FastDataInput implements DataInput, Closeable {
private static final int MAX_UNSIGNED_SHORT = 65_535;
private static final int DEFAULT_BUFFER_SIZE = 32_768;
private static AtomicReference<FastDataInput> sInCache = new AtomicReference<>();
private final VMRuntime mRuntime;
private final InputStream mIn;
private final byte[] mBuffer;
private final long mBufferPtr;
private final int mBufferCap;
private final boolean mUse4ByteSequence;
private InputStream mIn;
private int mBufferPos;
private int mBufferLim;
@ -57,7 +63,18 @@ public class FastDataInput implements DataInput, Closeable {
private int mStringRefCount = 0;
private String[] mStringRefs = new String[32];
/**
* @deprecated callers must specify {@code use4ByteSequence} so they make a
* clear choice about working around a long-standing ART bug, as
* described by the {@code kUtfUse4ByteSequence} comments in
* {@code art/runtime/jni/jni_internal.cc}.
*/
@Deprecated
public FastDataInput(@NonNull InputStream in, int bufferSize) {
this(in, bufferSize, true /* use4ByteSequence */);
}
public FastDataInput(@NonNull InputStream in, int bufferSize, boolean use4ByteSequence) {
mRuntime = VMRuntime.getRuntime();
mIn = Objects.requireNonNull(in);
if (bufferSize < 8) {
@ -67,6 +84,64 @@ public class FastDataInput implements DataInput, Closeable {
mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
mBufferPtr = mRuntime.addressOf(mBuffer);
mBufferCap = mBuffer.length;
mUse4ByteSequence = use4ByteSequence;
}
/**
* Obtain a {@link FastDataInput} configured with the given
* {@link InputStream} and which encodes large code-points using 3-byte
* sequences.
* <p>
* This <em>is</em> compatible with the {@link DataInput} API contract,
* which specifies that large code-points must be encoded with 3-byte
* sequences.
*/
public static FastDataInput obtainUsing3ByteSequences(@NonNull InputStream in) {
return new FastDataInput(in, DEFAULT_BUFFER_SIZE, false /* use4ByteSequence */);
}
/**
* Obtain a {@link FastDataInput} configured with the given
* {@link InputStream} and which decodes large code-points using 4-byte
* sequences.
* <p>
* This <em>is not</em> compatible with the {@link DataInput} API contract,
* which specifies that large code-points must be encoded with 3-byte
* sequences.
*/
public static FastDataInput obtainUsing4ByteSequences(@NonNull InputStream in) {
FastDataInput instance = sInCache.getAndSet(null);
if (instance != null) {
instance.setInput(in);
return instance;
}
return new FastDataInput(in, DEFAULT_BUFFER_SIZE, true /* use4ByteSequence */);
}
/**
* Release a {@link FastDataInput} to potentially be recycled. You must not
* interact with the object after releasing it.
*/
public void release() {
mIn = null;
mBufferPos = 0;
mBufferLim = 0;
mStringRefCount = 0;
if (mBufferCap == DEFAULT_BUFFER_SIZE && mUse4ByteSequence) {
// Try to return to the cache.
sInCache.compareAndSet(null, this);
}
}
/**
* Re-initializes the object for the new input.
*/
private void setInput(@NonNull InputStream in) {
mIn = Objects.requireNonNull(in);
mBufferPos = 0;
mBufferLim = 0;
mStringRefCount = 0;
}
private void fill(int need) throws IOException {
@ -90,6 +165,7 @@ public class FastDataInput implements DataInput, Closeable {
@Override
public void close() throws IOException {
mIn.close();
release();
}
@Override
@ -126,6 +202,14 @@ public class FastDataInput implements DataInput, Closeable {
@Override
public String readUTF() throws IOException {
if (mUse4ByteSequence) {
return readUTFUsing4ByteSequences();
} else {
return readUTFUsing3ByteSequences();
}
}
private String readUTFUsing4ByteSequences() throws IOException {
// Attempt to read directly from buffer space if there's enough room,
// otherwise fall back to chunking into place
final int len = readUnsignedShort();
@ -141,6 +225,22 @@ public class FastDataInput implements DataInput, Closeable {
}
}
private String readUTFUsing3ByteSequences() throws IOException {
// Attempt to read directly from buffer space if there's enough room,
// otherwise fall back to chunking into place
final int len = readUnsignedShort();
if (mBufferCap > len) {
if (mBufferLim - mBufferPos < len) fill(len);
final String res = ModifiedUtf8.decode(mBuffer, new char[len], mBufferPos, len);
mBufferPos += len;
return res;
} else {
final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
readFully(tmp, 0, len);
return ModifiedUtf8.decode(tmp, new char[len], 0, len);
}
}
/**
* Read a {@link String} value with the additional signal that the given
* value is a candidate for being canonicalized, similar to

View File

@ -30,6 +30,7 @@ import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicReference;
/**
* Optimized implementation of {@link DataOutput} which buffers data in memory
@ -41,23 +42,38 @@ import java.util.Objects;
public class FastDataOutput implements DataOutput, Flushable, Closeable {
private static final int MAX_UNSIGNED_SHORT = 65_535;
private static final int DEFAULT_BUFFER_SIZE = 32_768;
private static AtomicReference<FastDataOutput> sOutCache = new AtomicReference<>();
private final VMRuntime mRuntime;
private final OutputStream mOut;
private final byte[] mBuffer;
private final long mBufferPtr;
private final int mBufferCap;
private final boolean mUse4ByteSequence;
private OutputStream mOut;
private int mBufferPos;
/**
* Values that have been "interned" by {@link #writeInternedUTF(String)}.
*/
private HashMap<String, Short> mStringRefs = new HashMap<>();
private final HashMap<String, Short> mStringRefs = new HashMap<>();
/**
* @deprecated callers must specify {@code use4ByteSequence} so they make a
* clear choice about working around a long-standing ART bug, as
* described by the {@code kUtfUse4ByteSequence} comments in
* {@code art/runtime/jni/jni_internal.cc}.
*/
@Deprecated
public FastDataOutput(@NonNull OutputStream out, int bufferSize) {
this(out, bufferSize, true /* use4ByteSequence */);
}
public FastDataOutput(@NonNull OutputStream out, int bufferSize, boolean use4ByteSequence) {
mRuntime = VMRuntime.getRuntime();
mOut = Objects.requireNonNull(out);
if (bufferSize < 8) {
throw new IllegalArgumentException();
}
@ -65,6 +81,68 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
mBufferPtr = mRuntime.addressOf(mBuffer);
mBufferCap = mBuffer.length;
mUse4ByteSequence = use4ByteSequence;
setOutput(out);
}
/**
* Obtain a {@link FastDataOutput} configured with the given
* {@link OutputStream} and which encodes large code-points using 3-byte
* sequences.
* <p>
* This <em>is</em> compatible with the {@link DataOutput} API contract,
* which specifies that large code-points must be encoded with 3-byte
* sequences.
*/
public static FastDataOutput obtainUsing3ByteSequences(@NonNull OutputStream out) {
return new FastDataOutput(out, DEFAULT_BUFFER_SIZE, false /* use4ByteSequence */);
}
/**
* Obtain a {@link FastDataOutput} configured with the given
* {@link OutputStream} and which encodes large code-points using 4-byte
* sequences.
* <p>
* This <em>is not</em> compatible with the {@link DataOutput} API contract,
* which specifies that large code-points must be encoded with 3-byte
* sequences.
*/
public static FastDataOutput obtainUsing4ByteSequences(@NonNull OutputStream out) {
FastDataOutput instance = sOutCache.getAndSet(null);
if (instance != null) {
instance.setOutput(out);
return instance;
}
return new FastDataOutput(out, DEFAULT_BUFFER_SIZE, true /* use4ByteSequence */);
}
/**
* Release a {@link FastDataOutput} to potentially be recycled. You must not
* interact with the object after releasing it.
*/
public void release() {
if (mBufferPos > 0) {
throw new IllegalStateException("Lingering data, call flush() before releasing.");
}
mOut = null;
mBufferPos = 0;
mStringRefs.clear();
if (mBufferCap == DEFAULT_BUFFER_SIZE && mUse4ByteSequence) {
// Try to return to the cache.
sOutCache.compareAndSet(null, this);
}
}
/**
* Re-initializes the object for the new output.
*/
private void setOutput(@NonNull OutputStream out) {
mOut = Objects.requireNonNull(out);
mBufferPos = 0;
mStringRefs.clear();
}
private void drain() throws IOException {
@ -83,6 +161,7 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
@Override
public void close() throws IOException {
mOut.close();
release();
}
@Override
@ -109,6 +188,14 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
@Override
public void writeUTF(String s) throws IOException {
if (mUse4ByteSequence) {
writeUTFUsing4ByteSequences(s);
} else {
writeUTFUsing3ByteSequences(s);
}
}
private void writeUTFUsing4ByteSequences(String s) throws IOException {
// Attempt to write directly to buffer space if there's enough room,
// otherwise fall back to chunking into place
if (mBufferCap - mBufferPos < 2 + s.length()) drain();
@ -136,6 +223,27 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
}
}
private void writeUTFUsing3ByteSequences(String s) throws IOException {
final int len = (int) ModifiedUtf8.countBytes(s, false);
if (len > MAX_UNSIGNED_SHORT) {
throw new IOException("Modified UTF-8 length too large: " + len);
}
// Attempt to write directly to buffer space if there's enough room,
// otherwise fall back to chunking into place
if (mBufferCap >= 2 + len) {
if (mBufferCap - mBufferPos < 2 + len) drain();
writeShort(len);
ModifiedUtf8.encode(mBuffer, mBufferPos, s);
mBufferPos += len;
} else {
final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
ModifiedUtf8.encode(tmp, 0, s);
writeShort(len);
write(tmp, 0, len);
}
}
/**
* Write a {@link String} value with the additional signal that the given
* value is a candidate for being canonicalized, similar to

View File

@ -0,0 +1,110 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.internal.util;
import java.io.UTFDataFormatException;
public class ModifiedUtf8 {
/**
* Decodes a byte array containing <i>modified UTF-8</i> bytes into a string.
*
* <p>Note that although this method decodes the (supposedly impossible) zero byte to U+0000,
* that's what the RI does too.
*/
public static String decode(byte[] in, char[] out, int offset, int utfSize)
throws UTFDataFormatException {
int count = 0, s = 0, a;
while (count < utfSize) {
if ((out[s] = (char) in[offset + count++]) < '\u0080') {
s++;
} else if (((a = out[s]) & 0xe0) == 0xc0) {
if (count >= utfSize) {
throw new UTFDataFormatException("bad second byte at " + count);
}
int b = in[offset + count++];
if ((b & 0xC0) != 0x80) {
throw new UTFDataFormatException("bad second byte at " + (count - 1));
}
out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F));
} else if ((a & 0xf0) == 0xe0) {
if (count + 1 >= utfSize) {
throw new UTFDataFormatException("bad third byte at " + (count + 1));
}
int b = in[offset + count++];
int c = in[offset + count++];
if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) {
throw new UTFDataFormatException("bad second or third byte at " + (count - 2));
}
out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
} else {
throw new UTFDataFormatException("bad byte at " + (count - 1));
}
}
return new String(out, 0, s);
}
/**
* Returns the number of bytes the modified UTF-8 representation of 's' would take. Note
* that this is just the space for the bytes representing the characters, not the length
* which precedes those bytes, because different callers represent the length differently,
* as two, four, or even eight bytes. If {@code shortLength} is true, we'll throw an
* exception if the string is too long for its length to be represented by a short.
*/
public static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
long result = 0;
final int length = s.length();
for (int i = 0; i < length; ++i) {
char ch = s.charAt(i);
if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
++result;
} else if (ch <= 2047) {
result += 2;
} else {
result += 3;
}
if (shortLength && result > 65535) {
throw new UTFDataFormatException("String more than 65535 UTF bytes long");
}
}
return result;
}
/**
* Encodes the <i>modified UTF-8</i> bytes corresponding to string {@code s} into the
* byte array {@code dst}, starting at the given {@code offset}.
*/
public static void encode(byte[] dst, int offset, String s) {
final int length = s.length();
for (int i = 0; i < length; i++) {
char ch = s.charAt(i);
if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
dst[offset++] = (byte) ch;
} else if (ch <= 2047) {
dst[offset++] = (byte) (0xc0 | (0x1f & (ch >> 6)));
dst[offset++] = (byte) (0x80 | (0x3f & ch));
} else {
dst[offset++] = (byte) (0xe0 | (0x0f & (ch >> 12)));
dst[offset++] = (byte) (0x80 | (0x3f & (ch >> 6)));
dst[offset++] = (byte) (0x80 | (0x3f & ch));
}
}
}
private ModifiedUtf8() {
}
}

View File

@ -1,7 +1,20 @@
{
"presubmit": [
{
"name": "ScreenshotHelperTests"
"name": "ScreenshotHelperTests",
"file_patterns": ["ScreenshotHelper"]
},
{
"name": "FrameworksCoreTests",
"options": [
{
"include-filter": "android.util.XmlTest"
},
{
"include-filter": "android.util.BinaryXmlTest"
}
],
"file_patterns": ["Xml"]
}
]
}
}

16
core/jni/TEST_MAPPING Normal file
View File

@ -0,0 +1,16 @@
{
"presubmit": [
{
"name": "FrameworksCoreTests",
"options": [
{
"include-filter": "android.util.CharsetUtilsTest"
},
{
"include-filter": "com.android.internal.util.FastDataTest"
}
],
"file_patterns": ["CharsetUtils|FastData"]
}
]
}

View File

@ -224,7 +224,7 @@ public class XmlTest {
doVerifyRead(in);
}
private static final String TEST_STRING = "com.example";
private static final String TEST_STRING = "com☃example😀typical☃package😀name";
private static final String TEST_STRING_EMPTY = "";
private static final byte[] TEST_BYTES = new byte[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 };
private static final byte[] TEST_BYTES_EMPTY = new byte[0];

View File

@ -23,10 +23,13 @@ import static org.junit.Assert.fail;
import android.annotation.NonNull;
import android.util.ExceptionUtils;
import androidx.test.ext.junit.runners.AndroidJUnit4;
import libcore.util.HexEncoding;
import org.junit.Assume;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
@ -38,22 +41,34 @@ import java.io.EOFException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.function.Consumer;
@RunWith(AndroidJUnit4.class)
@RunWith(Parameterized.class)
public class FastDataTest {
private final boolean use4ByteSequence;
private static final String TEST_SHORT_STRING = "a";
private static final String TEST_LONG_STRING = "com☃example☃typical☃package☃name";
private static final String TEST_LONG_STRING = "com☃example😀typical☃package😀name";
private static final byte[] TEST_BYTES = TEST_LONG_STRING.getBytes(StandardCharsets.UTF_16LE);
@Parameters(name = "use4ByteSequence={0}")
public static Collection<Object[]> data() {
return Arrays.asList(new Object[][] { {true}, {false} });
}
public FastDataTest(boolean use4ByteSequence) {
this.use4ByteSequence = use4ByteSequence;
}
@Test
public void testEndOfFile_Int() throws Exception {
try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
new byte[] { 1 }), 1000)) {
new byte[] { 1 }), 1000, use4ByteSequence)) {
assertThrows(EOFException.class, () -> in.readInt());
}
try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
new byte[] { 1, 1, 1, 1 }), 1000)) {
new byte[] { 1, 1, 1, 1 }), 1000, use4ByteSequence)) {
assertEquals(1, in.readByte());
assertThrows(EOFException.class, () -> in.readInt());
}
@ -62,11 +77,11 @@ public class FastDataTest {
@Test
public void testEndOfFile_String() throws Exception {
try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
new byte[] { 1 }), 1000)) {
new byte[] { 1 }), 1000, use4ByteSequence)) {
assertThrows(EOFException.class, () -> in.readUTF());
}
try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
new byte[] { 1, 1, 1, 1 }), 1000)) {
new byte[] { 1, 1, 1, 1 }), 1000, use4ByteSequence)) {
assertThrows(EOFException.class, () -> in.readUTF());
}
}
@ -74,12 +89,12 @@ public class FastDataTest {
@Test
public void testEndOfFile_Bytes_Small() throws Exception {
try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
new byte[] { 1, 1, 1, 1 }), 1000)) {
new byte[] { 1, 1, 1, 1 }), 1000, use4ByteSequence)) {
final byte[] tmp = new byte[10];
assertThrows(EOFException.class, () -> in.readFully(tmp));
}
try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
new byte[] { 1, 1, 1, 1 }), 1000)) {
new byte[] { 1, 1, 1, 1 }), 1000, use4ByteSequence)) {
final byte[] tmp = new byte[10_000];
assertThrows(EOFException.class, () -> in.readFully(tmp));
}
@ -88,7 +103,8 @@ public class FastDataTest {
@Test
public void testUTF_Bounds() throws Exception {
final char[] buf = new char[65_534];
try (FastDataOutput out = new FastDataOutput(new ByteArrayOutputStream(), BOUNCE_SIZE)) {
try (FastDataOutput out = new FastDataOutput(new ByteArrayOutputStream(),
BOUNCE_SIZE, use4ByteSequence)) {
// Writing simple string will fit fine
Arrays.fill(buf, '!');
final String simple = new String(buf);
@ -100,11 +116,15 @@ public class FastDataTest {
final String complex = new String(buf);
assertThrows(IOException.class, () -> out.writeUTF(complex));
assertThrows(IOException.class, () -> out.writeInternedUTF(complex));
out.flush();
}
}
@Test
public void testTranscode() throws Exception {
Assume.assumeFalse(use4ByteSequence);
// Verify that upstream data can be read by fast
{
final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
@ -113,20 +133,22 @@ public class FastDataTest {
out.flush();
final FastDataInput in = new FastDataInput(
new ByteArrayInputStream(outStream.toByteArray()), BOUNCE_SIZE);
doTransodeRead(in);
new ByteArrayInputStream(outStream.toByteArray()),
BOUNCE_SIZE, use4ByteSequence);
doTranscodeRead(in);
}
// Verify that fast data can be read by upstream
{
final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
final FastDataOutput out = new FastDataOutput(outStream, BOUNCE_SIZE);
final FastDataOutput out = new FastDataOutput(outStream,
BOUNCE_SIZE, use4ByteSequence);
doTranscodeWrite(out);
out.flush();
final DataInputStream in = new DataInputStream(
new ByteArrayInputStream(outStream.toByteArray()));
doTransodeRead(in);
doTranscodeRead(in);
}
}
@ -144,7 +166,7 @@ public class FastDataTest {
out.writeDouble(32d);
}
private static void doTransodeRead(DataInput in) throws IOException {
private static void doTranscodeRead(DataInput in) throws IOException {
assertEquals(true, in.readBoolean());
assertEquals(false, in.readBoolean());
assertEquals(1, in.readByte());
@ -225,10 +247,12 @@ public class FastDataTest {
doBounce((out) -> {
out.writeUTF("");
out.writeUTF("");
out.writeUTF("😀");
out.writeUTF("example");
}, (in) -> {
assertEquals("", in.readUTF());
assertEquals("", in.readUTF());
assertEquals("😀", in.readUTF());
assertEquals("example", in.readUTF());
});
}
@ -263,6 +287,35 @@ public class FastDataTest {
}, 1);
}
/**
* Verify that we encode every valid code-point identically to RI when
* running in 3-byte mode.
*/
@Test
public void testBounce_UTF_Exhaustive() throws Exception {
Assume.assumeFalse(use4ByteSequence);
final ByteArrayOutputStream slowStream = new ByteArrayOutputStream();
final DataOutput slowData = new DataOutputStream(slowStream);
final ByteArrayOutputStream fastStream = new ByteArrayOutputStream();
final FastDataOutput fastData = FastDataOutput.obtainUsing3ByteSequences(fastStream);
for (int cp = Character.MIN_CODE_POINT; cp < Character.MAX_CODE_POINT; cp++) {
if (Character.isValidCodePoint(cp)) {
final String cpString = new String(Character.toChars(cp));
slowStream.reset();
slowData.writeUTF(cpString);
fastStream.reset();
fastData.writeUTF(cpString);
fastData.flush();
assertEquals("Bad encoding for code-point " + Integer.toHexString(cp),
HexEncoding.encodeToString(slowStream.toByteArray()),
HexEncoding.encodeToString(fastStream.toByteArray()));
}
}
}
@Test
public void testBounce_InternedUTF() throws Exception {
doBounce((out) -> {
@ -355,22 +408,24 @@ public class FastDataTest {
* Verify that some common data can be written and read back, effectively
* "bouncing" it through a serialized representation.
*/
private static void doBounce(@NonNull ThrowingConsumer<FastDataOutput> out,
private void doBounce(@NonNull ThrowingConsumer<FastDataOutput> out,
@NonNull ThrowingConsumer<FastDataInput> in) throws Exception {
doBounce(out, in, BOUNCE_REPEAT);
}
private static void doBounce(@NonNull ThrowingConsumer<FastDataOutput> out,
private void doBounce(@NonNull ThrowingConsumer<FastDataOutput> out,
@NonNull ThrowingConsumer<FastDataInput> in, int count) throws Exception {
final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
final FastDataOutput outData = new FastDataOutput(outStream, BOUNCE_SIZE);
final FastDataOutput outData = new FastDataOutput(outStream,
BOUNCE_SIZE, use4ByteSequence);
for (int i = 0; i < count; i++) {
out.accept(outData);
}
outData.flush();
final ByteArrayInputStream inStream = new ByteArrayInputStream(outStream.toByteArray());
final FastDataInput inData = new FastDataInput(inStream, BOUNCE_SIZE);
final FastDataInput inData = new FastDataInput(inStream,
BOUNCE_SIZE, use4ByteSequence);
for (int i = 0; i < count; i++) {
in.accept(inData);
}