Merge changes from topic "jul15"
* changes: Offer explicit 3-byte vs 4-byte modified UTF-8. Cache least recently used output buffer.
This commit is contained in:
commit
0e27faa039
@ -52,21 +52,45 @@ public class FastDataPerfTest {
|
||||
while (state.keepRunning()) {
|
||||
os.reset();
|
||||
final BufferedOutputStream bos = new BufferedOutputStream(os, BUFFER_SIZE);
|
||||
final DataOutput out = new DataOutputStream(bos);
|
||||
doWrite(out);
|
||||
bos.flush();
|
||||
final DataOutputStream out = new DataOutputStream(bos);
|
||||
try {
|
||||
doWrite(out);
|
||||
out.flush();
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void timeWrite_Local() throws IOException {
|
||||
public void timeWrite_LocalUsing4ByteSequences() throws IOException {
|
||||
final ByteArrayOutputStream os = new ByteArrayOutputStream(OUTPUT_SIZE);
|
||||
final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
|
||||
while (state.keepRunning()) {
|
||||
os.reset();
|
||||
final FastDataOutput out = new FastDataOutput(os, BUFFER_SIZE);
|
||||
doWrite(out);
|
||||
out.flush();
|
||||
final FastDataOutput out = FastDataOutput.obtainUsing4ByteSequences(os);
|
||||
try {
|
||||
doWrite(out);
|
||||
out.flush();
|
||||
} finally {
|
||||
out.release();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void timeWrite_LocalUsing3ByteSequences() throws IOException {
|
||||
final ByteArrayOutputStream os = new ByteArrayOutputStream(OUTPUT_SIZE);
|
||||
final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
|
||||
while (state.keepRunning()) {
|
||||
os.reset();
|
||||
final FastDataOutput out = FastDataOutput.obtainUsing3ByteSequences(os);
|
||||
try {
|
||||
doWrite(out);
|
||||
out.flush();
|
||||
} finally {
|
||||
out.release();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -77,19 +101,42 @@ public class FastDataPerfTest {
|
||||
while (state.keepRunning()) {
|
||||
is.reset();
|
||||
final BufferedInputStream bis = new BufferedInputStream(is, BUFFER_SIZE);
|
||||
final DataInput in = new DataInputStream(bis);
|
||||
doRead(in);
|
||||
final DataInputStream in = new DataInputStream(bis);
|
||||
try {
|
||||
doRead(in);
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void timeRead_Local() throws Exception {
|
||||
public void timeRead_LocalUsing4ByteSequences() throws Exception {
|
||||
final ByteArrayInputStream is = new ByteArrayInputStream(doWrite());
|
||||
final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
|
||||
while (state.keepRunning()) {
|
||||
is.reset();
|
||||
final DataInput in = new FastDataInput(is, BUFFER_SIZE);
|
||||
doRead(in);
|
||||
final FastDataInput in = FastDataInput.obtainUsing4ByteSequences(is);
|
||||
try {
|
||||
doRead(in);
|
||||
} finally {
|
||||
in.release();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void timeRead_LocalUsing3ByteSequences() throws Exception {
|
||||
final ByteArrayInputStream is = new ByteArrayInputStream(doWrite());
|
||||
final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
|
||||
while (state.keepRunning()) {
|
||||
is.reset();
|
||||
final FastDataInput in = FastDataInput.obtainUsing3ByteSequences(is);
|
||||
try {
|
||||
doRead(in);
|
||||
} finally {
|
||||
in.release();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -26,6 +26,12 @@ import dalvik.annotation.optimization.FastNative;
|
||||
* <p>
|
||||
* These methods purposefully accept only non-movable byte array addresses to
|
||||
* avoid extra JNI overhead.
|
||||
* <p>
|
||||
* Callers are cautioned that there is a long-standing ART bug that emits
|
||||
* non-standard 4-byte sequences, as described by {@code kUtfUse4ByteSequence}
|
||||
* in {@code art/runtime/jni/jni_internal.cc}. If precise modified UTF-8
|
||||
* encoding is required, use {@link com.android.internal.util.ModifiedUtf8}
|
||||
* instead.
|
||||
*
|
||||
* @hide
|
||||
*/
|
||||
@ -33,6 +39,12 @@ public class CharsetUtils {
|
||||
/**
|
||||
* Attempt to encode the given string as modified UTF-8 into the destination
|
||||
* byte array without making any new allocations.
|
||||
* <p>
|
||||
* Callers are cautioned that there is a long-standing ART bug that emits
|
||||
* non-standard 4-byte sequences, as described by
|
||||
* {@code kUtfUse4ByteSequence} in {@code art/runtime/jni/jni_internal.cc}.
|
||||
* If precise modified UTF-8 encoding is required, use
|
||||
* {@link com.android.internal.util.ModifiedUtf8} instead.
|
||||
*
|
||||
* @param src string value to be encoded
|
||||
* @param dest destination byte array to encode into
|
||||
@ -50,6 +62,12 @@ public class CharsetUtils {
|
||||
/**
|
||||
* Attempt to encode the given string as modified UTF-8 into the destination
|
||||
* byte array without making any new allocations.
|
||||
* <p>
|
||||
* Callers are cautioned that there is a long-standing ART bug that emits
|
||||
* non-standard 4-byte sequences, as described by
|
||||
* {@code kUtfUse4ByteSequence} in {@code art/runtime/jni/jni_internal.cc}.
|
||||
* If precise modified UTF-8 encoding is required, use
|
||||
* {@link com.android.internal.util.ModifiedUtf8} instead.
|
||||
*
|
||||
* @param src string value to be encoded
|
||||
* @param srcLen exact length of string to be encoded
|
||||
@ -66,6 +84,12 @@ public class CharsetUtils {
|
||||
|
||||
/**
|
||||
* Attempt to decode a modified UTF-8 string from the source byte array.
|
||||
* <p>
|
||||
* Callers are cautioned that there is a long-standing ART bug that emits
|
||||
* non-standard 4-byte sequences, as described by
|
||||
* {@code kUtfUse4ByteSequence} in {@code art/runtime/jni/jni_internal.cc}.
|
||||
* If precise modified UTF-8 encoding is required, use
|
||||
* {@link com.android.internal.util.ModifiedUtf8} instead.
|
||||
*
|
||||
* @param src source byte array to decode from
|
||||
* @param srcOff offset into source where decoding should begin
|
||||
|
28
core/java/android/util/TEST_MAPPING
Normal file
28
core/java/android/util/TEST_MAPPING
Normal file
@ -0,0 +1,28 @@
|
||||
{
|
||||
"presubmit": [
|
||||
{
|
||||
"name": "FrameworksCoreTests",
|
||||
"options": [
|
||||
{
|
||||
"include-filter": "android.util.CharsetUtilsTest"
|
||||
},
|
||||
{
|
||||
"include-filter": "com.android.internal.util.FastDataTest"
|
||||
}
|
||||
],
|
||||
"file_patterns": ["CharsetUtils|FastData"]
|
||||
},
|
||||
{
|
||||
"name": "FrameworksCoreTests",
|
||||
"options": [
|
||||
{
|
||||
"include-filter": "android.util.XmlTest"
|
||||
},
|
||||
{
|
||||
"include-filter": "android.util.BinaryXmlTest"
|
||||
}
|
||||
],
|
||||
"file_patterns": ["Xml"]
|
||||
}
|
||||
]
|
||||
}
|
@ -73,12 +73,6 @@ import java.util.Objects;
|
||||
* </ul>
|
||||
*/
|
||||
public final class BinaryXmlPullParser implements TypedXmlPullParser {
|
||||
/**
|
||||
* Default buffer size, which matches {@code FastXmlSerializer}. This should
|
||||
* be kept in sync with {@link BinaryXmlPullParser}.
|
||||
*/
|
||||
private static final int BUFFER_SIZE = 32_768;
|
||||
|
||||
private FastDataInput mIn;
|
||||
|
||||
private int mCurrentToken = START_DOCUMENT;
|
||||
@ -100,7 +94,12 @@ public final class BinaryXmlPullParser implements TypedXmlPullParser {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
mIn = new FastDataInput(is, BUFFER_SIZE);
|
||||
if (mIn != null) {
|
||||
mIn.release();
|
||||
mIn = null;
|
||||
}
|
||||
|
||||
mIn = FastDataInput.obtainUsing4ByteSequences(is);
|
||||
|
||||
mCurrentToken = START_DOCUMENT;
|
||||
mCurrentDepth = 0;
|
||||
|
@ -91,12 +91,6 @@ public final class BinaryXmlSerializer implements TypedXmlSerializer {
|
||||
static final int TYPE_BOOLEAN_TRUE = 12 << 4;
|
||||
static final int TYPE_BOOLEAN_FALSE = 13 << 4;
|
||||
|
||||
/**
|
||||
* Default buffer size, which matches {@code FastXmlSerializer}. This should
|
||||
* be kept in sync with {@link BinaryXmlPullParser}.
|
||||
*/
|
||||
private static final int BUFFER_SIZE = 32_768;
|
||||
|
||||
private FastDataOutput mOut;
|
||||
|
||||
/**
|
||||
@ -124,7 +118,7 @@ public final class BinaryXmlSerializer implements TypedXmlSerializer {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
mOut = new FastDataOutput(os, BUFFER_SIZE);
|
||||
mOut = FastDataOutput.obtainUsing4ByteSequences(os);
|
||||
mOut.write(PROTOCOL_MAGIC_VERSION_0);
|
||||
|
||||
mTagCount = 0;
|
||||
@ -138,7 +132,9 @@ public final class BinaryXmlSerializer implements TypedXmlSerializer {
|
||||
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
mOut.flush();
|
||||
if (mOut != null) {
|
||||
mOut.flush();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -157,6 +153,9 @@ public final class BinaryXmlSerializer implements TypedXmlSerializer {
|
||||
public void endDocument() throws IOException {
|
||||
mOut.writeByte(END_DOCUMENT | TYPE_NULL);
|
||||
flush();
|
||||
|
||||
mOut.release();
|
||||
mOut = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -30,6 +30,7 @@ import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
/**
|
||||
* Optimized implementation of {@link DataInput} which buffers data in memory
|
||||
@ -41,13 +42,18 @@ import java.util.Objects;
|
||||
public class FastDataInput implements DataInput, Closeable {
|
||||
private static final int MAX_UNSIGNED_SHORT = 65_535;
|
||||
|
||||
private static final int DEFAULT_BUFFER_SIZE = 32_768;
|
||||
|
||||
private static AtomicReference<FastDataInput> sInCache = new AtomicReference<>();
|
||||
|
||||
private final VMRuntime mRuntime;
|
||||
private final InputStream mIn;
|
||||
|
||||
private final byte[] mBuffer;
|
||||
private final long mBufferPtr;
|
||||
private final int mBufferCap;
|
||||
private final boolean mUse4ByteSequence;
|
||||
|
||||
private InputStream mIn;
|
||||
private int mBufferPos;
|
||||
private int mBufferLim;
|
||||
|
||||
@ -57,7 +63,18 @@ public class FastDataInput implements DataInput, Closeable {
|
||||
private int mStringRefCount = 0;
|
||||
private String[] mStringRefs = new String[32];
|
||||
|
||||
/**
|
||||
* @deprecated callers must specify {@code use4ByteSequence} so they make a
|
||||
* clear choice about working around a long-standing ART bug, as
|
||||
* described by the {@code kUtfUse4ByteSequence} comments in
|
||||
* {@code art/runtime/jni/jni_internal.cc}.
|
||||
*/
|
||||
@Deprecated
|
||||
public FastDataInput(@NonNull InputStream in, int bufferSize) {
|
||||
this(in, bufferSize, true /* use4ByteSequence */);
|
||||
}
|
||||
|
||||
public FastDataInput(@NonNull InputStream in, int bufferSize, boolean use4ByteSequence) {
|
||||
mRuntime = VMRuntime.getRuntime();
|
||||
mIn = Objects.requireNonNull(in);
|
||||
if (bufferSize < 8) {
|
||||
@ -67,6 +84,64 @@ public class FastDataInput implements DataInput, Closeable {
|
||||
mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
|
||||
mBufferPtr = mRuntime.addressOf(mBuffer);
|
||||
mBufferCap = mBuffer.length;
|
||||
mUse4ByteSequence = use4ByteSequence;
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain a {@link FastDataInput} configured with the given
|
||||
* {@link InputStream} and which encodes large code-points using 3-byte
|
||||
* sequences.
|
||||
* <p>
|
||||
* This <em>is</em> compatible with the {@link DataInput} API contract,
|
||||
* which specifies that large code-points must be encoded with 3-byte
|
||||
* sequences.
|
||||
*/
|
||||
public static FastDataInput obtainUsing3ByteSequences(@NonNull InputStream in) {
|
||||
return new FastDataInput(in, DEFAULT_BUFFER_SIZE, false /* use4ByteSequence */);
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain a {@link FastDataInput} configured with the given
|
||||
* {@link InputStream} and which decodes large code-points using 4-byte
|
||||
* sequences.
|
||||
* <p>
|
||||
* This <em>is not</em> compatible with the {@link DataInput} API contract,
|
||||
* which specifies that large code-points must be encoded with 3-byte
|
||||
* sequences.
|
||||
*/
|
||||
public static FastDataInput obtainUsing4ByteSequences(@NonNull InputStream in) {
|
||||
FastDataInput instance = sInCache.getAndSet(null);
|
||||
if (instance != null) {
|
||||
instance.setInput(in);
|
||||
return instance;
|
||||
}
|
||||
return new FastDataInput(in, DEFAULT_BUFFER_SIZE, true /* use4ByteSequence */);
|
||||
}
|
||||
|
||||
/**
|
||||
* Release a {@link FastDataInput} to potentially be recycled. You must not
|
||||
* interact with the object after releasing it.
|
||||
*/
|
||||
public void release() {
|
||||
mIn = null;
|
||||
mBufferPos = 0;
|
||||
mBufferLim = 0;
|
||||
mStringRefCount = 0;
|
||||
|
||||
if (mBufferCap == DEFAULT_BUFFER_SIZE && mUse4ByteSequence) {
|
||||
// Try to return to the cache.
|
||||
sInCache.compareAndSet(null, this);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-initializes the object for the new input.
|
||||
*/
|
||||
private void setInput(@NonNull InputStream in) {
|
||||
mIn = Objects.requireNonNull(in);
|
||||
mBufferPos = 0;
|
||||
mBufferLim = 0;
|
||||
mStringRefCount = 0;
|
||||
}
|
||||
|
||||
private void fill(int need) throws IOException {
|
||||
@ -90,6 +165,7 @@ public class FastDataInput implements DataInput, Closeable {
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
mIn.close();
|
||||
release();
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -126,6 +202,14 @@ public class FastDataInput implements DataInput, Closeable {
|
||||
|
||||
@Override
|
||||
public String readUTF() throws IOException {
|
||||
if (mUse4ByteSequence) {
|
||||
return readUTFUsing4ByteSequences();
|
||||
} else {
|
||||
return readUTFUsing3ByteSequences();
|
||||
}
|
||||
}
|
||||
|
||||
private String readUTFUsing4ByteSequences() throws IOException {
|
||||
// Attempt to read directly from buffer space if there's enough room,
|
||||
// otherwise fall back to chunking into place
|
||||
final int len = readUnsignedShort();
|
||||
@ -141,6 +225,22 @@ public class FastDataInput implements DataInput, Closeable {
|
||||
}
|
||||
}
|
||||
|
||||
private String readUTFUsing3ByteSequences() throws IOException {
|
||||
// Attempt to read directly from buffer space if there's enough room,
|
||||
// otherwise fall back to chunking into place
|
||||
final int len = readUnsignedShort();
|
||||
if (mBufferCap > len) {
|
||||
if (mBufferLim - mBufferPos < len) fill(len);
|
||||
final String res = ModifiedUtf8.decode(mBuffer, new char[len], mBufferPos, len);
|
||||
mBufferPos += len;
|
||||
return res;
|
||||
} else {
|
||||
final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
|
||||
readFully(tmp, 0, len);
|
||||
return ModifiedUtf8.decode(tmp, new char[len], 0, len);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a {@link String} value with the additional signal that the given
|
||||
* value is a candidate for being canonicalized, similar to
|
||||
|
@ -30,6 +30,7 @@ import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
/**
|
||||
* Optimized implementation of {@link DataOutput} which buffers data in memory
|
||||
@ -41,23 +42,38 @@ import java.util.Objects;
|
||||
public class FastDataOutput implements DataOutput, Flushable, Closeable {
|
||||
private static final int MAX_UNSIGNED_SHORT = 65_535;
|
||||
|
||||
private static final int DEFAULT_BUFFER_SIZE = 32_768;
|
||||
|
||||
private static AtomicReference<FastDataOutput> sOutCache = new AtomicReference<>();
|
||||
|
||||
private final VMRuntime mRuntime;
|
||||
private final OutputStream mOut;
|
||||
|
||||
private final byte[] mBuffer;
|
||||
private final long mBufferPtr;
|
||||
private final int mBufferCap;
|
||||
private final boolean mUse4ByteSequence;
|
||||
|
||||
private OutputStream mOut;
|
||||
private int mBufferPos;
|
||||
|
||||
/**
|
||||
* Values that have been "interned" by {@link #writeInternedUTF(String)}.
|
||||
*/
|
||||
private HashMap<String, Short> mStringRefs = new HashMap<>();
|
||||
private final HashMap<String, Short> mStringRefs = new HashMap<>();
|
||||
|
||||
/**
|
||||
* @deprecated callers must specify {@code use4ByteSequence} so they make a
|
||||
* clear choice about working around a long-standing ART bug, as
|
||||
* described by the {@code kUtfUse4ByteSequence} comments in
|
||||
* {@code art/runtime/jni/jni_internal.cc}.
|
||||
*/
|
||||
@Deprecated
|
||||
public FastDataOutput(@NonNull OutputStream out, int bufferSize) {
|
||||
this(out, bufferSize, true /* use4ByteSequence */);
|
||||
}
|
||||
|
||||
public FastDataOutput(@NonNull OutputStream out, int bufferSize, boolean use4ByteSequence) {
|
||||
mRuntime = VMRuntime.getRuntime();
|
||||
mOut = Objects.requireNonNull(out);
|
||||
if (bufferSize < 8) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
@ -65,6 +81,68 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
|
||||
mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
|
||||
mBufferPtr = mRuntime.addressOf(mBuffer);
|
||||
mBufferCap = mBuffer.length;
|
||||
mUse4ByteSequence = use4ByteSequence;
|
||||
|
||||
setOutput(out);
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain a {@link FastDataOutput} configured with the given
|
||||
* {@link OutputStream} and which encodes large code-points using 3-byte
|
||||
* sequences.
|
||||
* <p>
|
||||
* This <em>is</em> compatible with the {@link DataOutput} API contract,
|
||||
* which specifies that large code-points must be encoded with 3-byte
|
||||
* sequences.
|
||||
*/
|
||||
public static FastDataOutput obtainUsing3ByteSequences(@NonNull OutputStream out) {
|
||||
return new FastDataOutput(out, DEFAULT_BUFFER_SIZE, false /* use4ByteSequence */);
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain a {@link FastDataOutput} configured with the given
|
||||
* {@link OutputStream} and which encodes large code-points using 4-byte
|
||||
* sequences.
|
||||
* <p>
|
||||
* This <em>is not</em> compatible with the {@link DataOutput} API contract,
|
||||
* which specifies that large code-points must be encoded with 3-byte
|
||||
* sequences.
|
||||
*/
|
||||
public static FastDataOutput obtainUsing4ByteSequences(@NonNull OutputStream out) {
|
||||
FastDataOutput instance = sOutCache.getAndSet(null);
|
||||
if (instance != null) {
|
||||
instance.setOutput(out);
|
||||
return instance;
|
||||
}
|
||||
return new FastDataOutput(out, DEFAULT_BUFFER_SIZE, true /* use4ByteSequence */);
|
||||
}
|
||||
|
||||
/**
|
||||
* Release a {@link FastDataOutput} to potentially be recycled. You must not
|
||||
* interact with the object after releasing it.
|
||||
*/
|
||||
public void release() {
|
||||
if (mBufferPos > 0) {
|
||||
throw new IllegalStateException("Lingering data, call flush() before releasing.");
|
||||
}
|
||||
|
||||
mOut = null;
|
||||
mBufferPos = 0;
|
||||
mStringRefs.clear();
|
||||
|
||||
if (mBufferCap == DEFAULT_BUFFER_SIZE && mUse4ByteSequence) {
|
||||
// Try to return to the cache.
|
||||
sOutCache.compareAndSet(null, this);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-initializes the object for the new output.
|
||||
*/
|
||||
private void setOutput(@NonNull OutputStream out) {
|
||||
mOut = Objects.requireNonNull(out);
|
||||
mBufferPos = 0;
|
||||
mStringRefs.clear();
|
||||
}
|
||||
|
||||
private void drain() throws IOException {
|
||||
@ -83,6 +161,7 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
mOut.close();
|
||||
release();
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -109,6 +188,14 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
|
||||
|
||||
@Override
|
||||
public void writeUTF(String s) throws IOException {
|
||||
if (mUse4ByteSequence) {
|
||||
writeUTFUsing4ByteSequences(s);
|
||||
} else {
|
||||
writeUTFUsing3ByteSequences(s);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeUTFUsing4ByteSequences(String s) throws IOException {
|
||||
// Attempt to write directly to buffer space if there's enough room,
|
||||
// otherwise fall back to chunking into place
|
||||
if (mBufferCap - mBufferPos < 2 + s.length()) drain();
|
||||
@ -136,6 +223,27 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
|
||||
}
|
||||
}
|
||||
|
||||
private void writeUTFUsing3ByteSequences(String s) throws IOException {
|
||||
final int len = (int) ModifiedUtf8.countBytes(s, false);
|
||||
if (len > MAX_UNSIGNED_SHORT) {
|
||||
throw new IOException("Modified UTF-8 length too large: " + len);
|
||||
}
|
||||
|
||||
// Attempt to write directly to buffer space if there's enough room,
|
||||
// otherwise fall back to chunking into place
|
||||
if (mBufferCap >= 2 + len) {
|
||||
if (mBufferCap - mBufferPos < 2 + len) drain();
|
||||
writeShort(len);
|
||||
ModifiedUtf8.encode(mBuffer, mBufferPos, s);
|
||||
mBufferPos += len;
|
||||
} else {
|
||||
final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
|
||||
ModifiedUtf8.encode(tmp, 0, s);
|
||||
writeShort(len);
|
||||
write(tmp, 0, len);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a {@link String} value with the additional signal that the given
|
||||
* value is a candidate for being canonicalized, similar to
|
||||
|
110
core/java/com/android/internal/util/ModifiedUtf8.java
Normal file
110
core/java/com/android/internal/util/ModifiedUtf8.java
Normal file
@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.internal.util;
|
||||
|
||||
import java.io.UTFDataFormatException;
|
||||
|
||||
public class ModifiedUtf8 {
|
||||
/**
|
||||
* Decodes a byte array containing <i>modified UTF-8</i> bytes into a string.
|
||||
*
|
||||
* <p>Note that although this method decodes the (supposedly impossible) zero byte to U+0000,
|
||||
* that's what the RI does too.
|
||||
*/
|
||||
public static String decode(byte[] in, char[] out, int offset, int utfSize)
|
||||
throws UTFDataFormatException {
|
||||
int count = 0, s = 0, a;
|
||||
while (count < utfSize) {
|
||||
if ((out[s] = (char) in[offset + count++]) < '\u0080') {
|
||||
s++;
|
||||
} else if (((a = out[s]) & 0xe0) == 0xc0) {
|
||||
if (count >= utfSize) {
|
||||
throw new UTFDataFormatException("bad second byte at " + count);
|
||||
}
|
||||
int b = in[offset + count++];
|
||||
if ((b & 0xC0) != 0x80) {
|
||||
throw new UTFDataFormatException("bad second byte at " + (count - 1));
|
||||
}
|
||||
out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F));
|
||||
} else if ((a & 0xf0) == 0xe0) {
|
||||
if (count + 1 >= utfSize) {
|
||||
throw new UTFDataFormatException("bad third byte at " + (count + 1));
|
||||
}
|
||||
int b = in[offset + count++];
|
||||
int c = in[offset + count++];
|
||||
if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) {
|
||||
throw new UTFDataFormatException("bad second or third byte at " + (count - 2));
|
||||
}
|
||||
out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
|
||||
} else {
|
||||
throw new UTFDataFormatException("bad byte at " + (count - 1));
|
||||
}
|
||||
}
|
||||
return new String(out, 0, s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bytes the modified UTF-8 representation of 's' would take. Note
|
||||
* that this is just the space for the bytes representing the characters, not the length
|
||||
* which precedes those bytes, because different callers represent the length differently,
|
||||
* as two, four, or even eight bytes. If {@code shortLength} is true, we'll throw an
|
||||
* exception if the string is too long for its length to be represented by a short.
|
||||
*/
|
||||
public static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
|
||||
long result = 0;
|
||||
final int length = s.length();
|
||||
for (int i = 0; i < length; ++i) {
|
||||
char ch = s.charAt(i);
|
||||
if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
|
||||
++result;
|
||||
} else if (ch <= 2047) {
|
||||
result += 2;
|
||||
} else {
|
||||
result += 3;
|
||||
}
|
||||
if (shortLength && result > 65535) {
|
||||
throw new UTFDataFormatException("String more than 65535 UTF bytes long");
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the <i>modified UTF-8</i> bytes corresponding to string {@code s} into the
|
||||
* byte array {@code dst}, starting at the given {@code offset}.
|
||||
*/
|
||||
public static void encode(byte[] dst, int offset, String s) {
|
||||
final int length = s.length();
|
||||
for (int i = 0; i < length; i++) {
|
||||
char ch = s.charAt(i);
|
||||
if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
|
||||
dst[offset++] = (byte) ch;
|
||||
} else if (ch <= 2047) {
|
||||
dst[offset++] = (byte) (0xc0 | (0x1f & (ch >> 6)));
|
||||
dst[offset++] = (byte) (0x80 | (0x3f & ch));
|
||||
} else {
|
||||
dst[offset++] = (byte) (0xe0 | (0x0f & (ch >> 12)));
|
||||
dst[offset++] = (byte) (0x80 | (0x3f & (ch >> 6)));
|
||||
dst[offset++] = (byte) (0x80 | (0x3f & ch));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private ModifiedUtf8() {
|
||||
}
|
||||
}
|
@ -1,7 +1,20 @@
|
||||
{
|
||||
"presubmit": [
|
||||
{
|
||||
"name": "ScreenshotHelperTests"
|
||||
"name": "ScreenshotHelperTests",
|
||||
"file_patterns": ["ScreenshotHelper"]
|
||||
},
|
||||
{
|
||||
"name": "FrameworksCoreTests",
|
||||
"options": [
|
||||
{
|
||||
"include-filter": "android.util.XmlTest"
|
||||
},
|
||||
{
|
||||
"include-filter": "android.util.BinaryXmlTest"
|
||||
}
|
||||
],
|
||||
"file_patterns": ["Xml"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
16
core/jni/TEST_MAPPING
Normal file
16
core/jni/TEST_MAPPING
Normal file
@ -0,0 +1,16 @@
|
||||
{
|
||||
"presubmit": [
|
||||
{
|
||||
"name": "FrameworksCoreTests",
|
||||
"options": [
|
||||
{
|
||||
"include-filter": "android.util.CharsetUtilsTest"
|
||||
},
|
||||
{
|
||||
"include-filter": "com.android.internal.util.FastDataTest"
|
||||
}
|
||||
],
|
||||
"file_patterns": ["CharsetUtils|FastData"]
|
||||
}
|
||||
]
|
||||
}
|
@ -224,7 +224,7 @@ public class XmlTest {
|
||||
doVerifyRead(in);
|
||||
}
|
||||
|
||||
private static final String TEST_STRING = "com.example";
|
||||
private static final String TEST_STRING = "com☃example😀typical☃package😀name";
|
||||
private static final String TEST_STRING_EMPTY = "";
|
||||
private static final byte[] TEST_BYTES = new byte[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 };
|
||||
private static final byte[] TEST_BYTES_EMPTY = new byte[0];
|
||||
|
@ -23,10 +23,13 @@ import static org.junit.Assert.fail;
|
||||
import android.annotation.NonNull;
|
||||
import android.util.ExceptionUtils;
|
||||
|
||||
import androidx.test.ext.junit.runners.AndroidJUnit4;
|
||||
import libcore.util.HexEncoding;
|
||||
|
||||
import org.junit.Assume;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
@ -38,22 +41,34 @@ import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
@RunWith(AndroidJUnit4.class)
|
||||
@RunWith(Parameterized.class)
|
||||
public class FastDataTest {
|
||||
private final boolean use4ByteSequence;
|
||||
|
||||
private static final String TEST_SHORT_STRING = "a";
|
||||
private static final String TEST_LONG_STRING = "com☃example☃typical☃package☃name";
|
||||
private static final String TEST_LONG_STRING = "com☃example😀typical☃package😀name";
|
||||
private static final byte[] TEST_BYTES = TEST_LONG_STRING.getBytes(StandardCharsets.UTF_16LE);
|
||||
|
||||
@Parameters(name = "use4ByteSequence={0}")
|
||||
public static Collection<Object[]> data() {
|
||||
return Arrays.asList(new Object[][] { {true}, {false} });
|
||||
}
|
||||
|
||||
public FastDataTest(boolean use4ByteSequence) {
|
||||
this.use4ByteSequence = use4ByteSequence;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEndOfFile_Int() throws Exception {
|
||||
try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
|
||||
new byte[] { 1 }), 1000)) {
|
||||
new byte[] { 1 }), 1000, use4ByteSequence)) {
|
||||
assertThrows(EOFException.class, () -> in.readInt());
|
||||
}
|
||||
try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
|
||||
new byte[] { 1, 1, 1, 1 }), 1000)) {
|
||||
new byte[] { 1, 1, 1, 1 }), 1000, use4ByteSequence)) {
|
||||
assertEquals(1, in.readByte());
|
||||
assertThrows(EOFException.class, () -> in.readInt());
|
||||
}
|
||||
@ -62,11 +77,11 @@ public class FastDataTest {
|
||||
@Test
|
||||
public void testEndOfFile_String() throws Exception {
|
||||
try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
|
||||
new byte[] { 1 }), 1000)) {
|
||||
new byte[] { 1 }), 1000, use4ByteSequence)) {
|
||||
assertThrows(EOFException.class, () -> in.readUTF());
|
||||
}
|
||||
try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
|
||||
new byte[] { 1, 1, 1, 1 }), 1000)) {
|
||||
new byte[] { 1, 1, 1, 1 }), 1000, use4ByteSequence)) {
|
||||
assertThrows(EOFException.class, () -> in.readUTF());
|
||||
}
|
||||
}
|
||||
@ -74,12 +89,12 @@ public class FastDataTest {
|
||||
@Test
|
||||
public void testEndOfFile_Bytes_Small() throws Exception {
|
||||
try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
|
||||
new byte[] { 1, 1, 1, 1 }), 1000)) {
|
||||
new byte[] { 1, 1, 1, 1 }), 1000, use4ByteSequence)) {
|
||||
final byte[] tmp = new byte[10];
|
||||
assertThrows(EOFException.class, () -> in.readFully(tmp));
|
||||
}
|
||||
try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
|
||||
new byte[] { 1, 1, 1, 1 }), 1000)) {
|
||||
new byte[] { 1, 1, 1, 1 }), 1000, use4ByteSequence)) {
|
||||
final byte[] tmp = new byte[10_000];
|
||||
assertThrows(EOFException.class, () -> in.readFully(tmp));
|
||||
}
|
||||
@ -88,7 +103,8 @@ public class FastDataTest {
|
||||
@Test
|
||||
public void testUTF_Bounds() throws Exception {
|
||||
final char[] buf = new char[65_534];
|
||||
try (FastDataOutput out = new FastDataOutput(new ByteArrayOutputStream(), BOUNCE_SIZE)) {
|
||||
try (FastDataOutput out = new FastDataOutput(new ByteArrayOutputStream(),
|
||||
BOUNCE_SIZE, use4ByteSequence)) {
|
||||
// Writing simple string will fit fine
|
||||
Arrays.fill(buf, '!');
|
||||
final String simple = new String(buf);
|
||||
@ -100,11 +116,15 @@ public class FastDataTest {
|
||||
final String complex = new String(buf);
|
||||
assertThrows(IOException.class, () -> out.writeUTF(complex));
|
||||
assertThrows(IOException.class, () -> out.writeInternedUTF(complex));
|
||||
|
||||
out.flush();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTranscode() throws Exception {
|
||||
Assume.assumeFalse(use4ByteSequence);
|
||||
|
||||
// Verify that upstream data can be read by fast
|
||||
{
|
||||
final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
|
||||
@ -113,20 +133,22 @@ public class FastDataTest {
|
||||
out.flush();
|
||||
|
||||
final FastDataInput in = new FastDataInput(
|
||||
new ByteArrayInputStream(outStream.toByteArray()), BOUNCE_SIZE);
|
||||
doTransodeRead(in);
|
||||
new ByteArrayInputStream(outStream.toByteArray()),
|
||||
BOUNCE_SIZE, use4ByteSequence);
|
||||
doTranscodeRead(in);
|
||||
}
|
||||
|
||||
// Verify that fast data can be read by upstream
|
||||
{
|
||||
final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
|
||||
final FastDataOutput out = new FastDataOutput(outStream, BOUNCE_SIZE);
|
||||
final FastDataOutput out = new FastDataOutput(outStream,
|
||||
BOUNCE_SIZE, use4ByteSequence);
|
||||
doTranscodeWrite(out);
|
||||
out.flush();
|
||||
|
||||
final DataInputStream in = new DataInputStream(
|
||||
new ByteArrayInputStream(outStream.toByteArray()));
|
||||
doTransodeRead(in);
|
||||
doTranscodeRead(in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -144,7 +166,7 @@ public class FastDataTest {
|
||||
out.writeDouble(32d);
|
||||
}
|
||||
|
||||
private static void doTransodeRead(DataInput in) throws IOException {
|
||||
private static void doTranscodeRead(DataInput in) throws IOException {
|
||||
assertEquals(true, in.readBoolean());
|
||||
assertEquals(false, in.readBoolean());
|
||||
assertEquals(1, in.readByte());
|
||||
@ -225,10 +247,12 @@ public class FastDataTest {
|
||||
doBounce((out) -> {
|
||||
out.writeUTF("");
|
||||
out.writeUTF("☃");
|
||||
out.writeUTF("😀");
|
||||
out.writeUTF("example");
|
||||
}, (in) -> {
|
||||
assertEquals("", in.readUTF());
|
||||
assertEquals("☃", in.readUTF());
|
||||
assertEquals("😀", in.readUTF());
|
||||
assertEquals("example", in.readUTF());
|
||||
});
|
||||
}
|
||||
@ -263,6 +287,35 @@ public class FastDataTest {
|
||||
}, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that we encode every valid code-point identically to RI when
|
||||
* running in 3-byte mode.
|
||||
*/
|
||||
@Test
|
||||
public void testBounce_UTF_Exhaustive() throws Exception {
|
||||
Assume.assumeFalse(use4ByteSequence);
|
||||
|
||||
final ByteArrayOutputStream slowStream = new ByteArrayOutputStream();
|
||||
final DataOutput slowData = new DataOutputStream(slowStream);
|
||||
|
||||
final ByteArrayOutputStream fastStream = new ByteArrayOutputStream();
|
||||
final FastDataOutput fastData = FastDataOutput.obtainUsing3ByteSequences(fastStream);
|
||||
|
||||
for (int cp = Character.MIN_CODE_POINT; cp < Character.MAX_CODE_POINT; cp++) {
|
||||
if (Character.isValidCodePoint(cp)) {
|
||||
final String cpString = new String(Character.toChars(cp));
|
||||
slowStream.reset();
|
||||
slowData.writeUTF(cpString);
|
||||
fastStream.reset();
|
||||
fastData.writeUTF(cpString);
|
||||
fastData.flush();
|
||||
assertEquals("Bad encoding for code-point " + Integer.toHexString(cp),
|
||||
HexEncoding.encodeToString(slowStream.toByteArray()),
|
||||
HexEncoding.encodeToString(fastStream.toByteArray()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBounce_InternedUTF() throws Exception {
|
||||
doBounce((out) -> {
|
||||
@ -355,22 +408,24 @@ public class FastDataTest {
|
||||
* Verify that some common data can be written and read back, effectively
|
||||
* "bouncing" it through a serialized representation.
|
||||
*/
|
||||
private static void doBounce(@NonNull ThrowingConsumer<FastDataOutput> out,
|
||||
private void doBounce(@NonNull ThrowingConsumer<FastDataOutput> out,
|
||||
@NonNull ThrowingConsumer<FastDataInput> in) throws Exception {
|
||||
doBounce(out, in, BOUNCE_REPEAT);
|
||||
}
|
||||
|
||||
private static void doBounce(@NonNull ThrowingConsumer<FastDataOutput> out,
|
||||
private void doBounce(@NonNull ThrowingConsumer<FastDataOutput> out,
|
||||
@NonNull ThrowingConsumer<FastDataInput> in, int count) throws Exception {
|
||||
final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
|
||||
final FastDataOutput outData = new FastDataOutput(outStream, BOUNCE_SIZE);
|
||||
final FastDataOutput outData = new FastDataOutput(outStream,
|
||||
BOUNCE_SIZE, use4ByteSequence);
|
||||
for (int i = 0; i < count; i++) {
|
||||
out.accept(outData);
|
||||
}
|
||||
outData.flush();
|
||||
|
||||
final ByteArrayInputStream inStream = new ByteArrayInputStream(outStream.toByteArray());
|
||||
final FastDataInput inData = new FastDataInput(inStream, BOUNCE_SIZE);
|
||||
final FastDataInput inData = new FastDataInput(inStream,
|
||||
BOUNCE_SIZE, use4ByteSequence);
|
||||
for (int i = 0; i < count; i++) {
|
||||
in.accept(inData);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user