tweak the Base64 implementation

- move the encodeInternal/decodeInternal methods into the inner
  "state" classes

- tighten up the inner loop of the encoder and decoder a bit, saving
  about 5% of time in both cases

- improve javadoc

- other little fixes

Change-Id: I72e0ce8502c664a32418cea04636ccdbf4fec17c
This commit is contained in:
Doug Zongker
2010-02-14 13:48:49 -08:00
parent 5b31fdae60
commit 9df2ffd420
4 changed files with 495 additions and 476 deletions

View File

@ -16,9 +16,13 @@
package android.util.base64;
import java.io.UnsupportedEncodingException;
/**
* Utilities for encoding and decoding the Base64 encoding. See RFCs
* 2045 and 3548.
* Utilities for encoding and decoding the Base64 representation of
* binary data. See RFCs <a
* href="http://www.ietf.org/rfc/rfc2045.txt">2045</a> and <a
* href="http://www.ietf.org/rfc/rfc3548.txt">3548</a>.
*/
public class Base64 {
/**
@ -27,97 +31,79 @@ public class Base64 {
public static final int DEFAULT = 0;
/**
* Encoder flag bit to indicate you want the padding '='
* characters at the end (if any) to be omitted.
* Encoder flag bit to omit the padding '=' characters at the end
* of the output (if any).
*/
public static final int NO_PADDING = 1;
/**
* Encoder flag bit to indicate you want all line terminators to
* be omitted (ie, the output will be on one long line).
* Encoder flag bit to omit all line terminators (i.e., the output
* will be on one long line).
*/
public static final int NO_WRAP = 2;
/**
* Encoder flag bit to indicate you want lines to be ended with
* CRLF instead of just LF. Has no effect if {@code NO_WRAP} is
* specified as well.
* Encoder flag bit to indicate lines should be terminated with a
* CRLF pair instead of just an LF. Has no effect if {@code
* NO_WRAP} is specified as well.
*/
public static final int CRLF = 4;
/**
* Encoder/decoder flag bit to indicate using the "web safe"
* variant of Base64 (see RFC 3548 section 4) where '-' and '_'
* are used in place of '+' and '/'.
* Encoder/decoder flag bit to indicate using the "URL and
* filename safe" variant of Base64 (see RFC 3548 section 4) where
* {@code -} and {@code _} are used in place of {@code +} and
* {@code /}.
*/
public static final int WEB_SAFE = 8;
public static final int URL_SAFE = 8;
/**
* Flag to pass to Base64OutputStream to indicate that it should
* not close the output stream it is wrapping when it itself is
* closed.
* Flag to pass to {@link Base64OutputStream} to indicate that it
* should not close the output stream it is wrapping when it
* itself is closed.
*/
public static final int NO_CLOSE = 16;
// --------------------------------------------------------
// shared code
// --------------------------------------------------------
/* package */ static abstract class Coder {
public byte[] output;
public int op;
/**
* Encode/decode another block of input data. this.output is
* provided by the caller, and must be big enough to hold all
* the coded data. On exit, this.opwill be set to the length
* of the coded data.
*
* @param finish true if this is the final call to process for
* this object. Will finalize the coder state and
* include any final bytes in the output.
*
* @return true if the input so far is good; false if some
* error has been detected in the input stream..
*/
public abstract boolean process(byte[] input, int offset, int len, boolean finish);
/**
* @return the maximum number of bytes a call to process()
* could produce for the given number of input bytes. This may
* be an overestimate.
*/
public abstract int maxOutputSize(int len);
}
// --------------------------------------------------------
// decoding
// --------------------------------------------------------
/**
* Lookup table for turning bytes into their position in the
* Base64 alphabet.
*/
private static final int DECODE[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
/**
* Decode lookup table for the "web safe" variant (RFC 3548
* sec. 4) where - and _ replace + and /.
*/
private static final int DECODE_WEBSAFE[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
/** Non-data values in the DECODE arrays. */
private static final int SKIP = -1;
private static final int EQUALS = -2;
/**
* Decode the Base64-encoded data in input and return the data in
* a new byte array.
*
* The padding '=' characters at the end are considered optional, but
* <p>The padding '=' characters at the end are considered optional, but
* if any are present, there must be the correct number of them.
*
* @param str the input String to decode, which is converted to
@ -136,7 +122,7 @@ public class Base64 {
* Decode the Base64-encoded data in input and return the data in
* a new byte array.
*
* The padding '=' characters at the end are considered optional, but
* <p>The padding '=' characters at the end are considered optional, but
* if any are present, there must be the correct number of them.
*
* @param input the input array to decode
@ -154,7 +140,7 @@ public class Base64 {
* Decode the Base64-encoded data in input and return the data in
* a new byte array.
*
* The padding '=' characters at the end are considered optional, but
* <p>The padding '=' characters at the end are considered optional, but
* if any are present, there must be the correct number of them.
*
* @param input the data to decode
@ -169,121 +155,172 @@ public class Base64 {
public static byte[] decode(byte[] input, int offset, int len, int flags) {
// Allocate space for the most data the input could represent.
// (It could contain less if it contains whitespace, etc.)
DecoderState state = new DecoderState(flags, new byte[len*3/4]);
Decoder decoder = new Decoder(flags, new byte[len*3/4]);
if (!decodeInternal(input, offset, len, state, true)) {
if (!decoder.process(input, offset, len, true)) {
throw new IllegalArgumentException("bad base-64");
}
// Maybe we got lucky and allocated exactly enough output space.
if (state.op == state.output.length) {
return state.output;
if (decoder.op == decoder.output.length) {
return decoder.output;
}
// Need to shorten the array, so allocate a new one of the
// right size and copy.
byte[] temp = new byte[state.op];
System.arraycopy(state.output, 0, temp, 0, state.op);
byte[] temp = new byte[decoder.op];
System.arraycopy(decoder.output, 0, temp, 0, decoder.op);
return temp;
}
/* package */ static class DecoderState {
public byte[] output;
public int op;
/* package */ static class Decoder extends Coder {
/**
* Lookup table for turning bytes into their position in the
* Base64 alphabet.
*/
private static final int DECODE[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
public int state; // state number (0 to 6)
public int value;
/**
* Decode lookup table for the "web safe" variant (RFC 3548
* sec. 4) where - and _ replace + and /.
*/
private static final int DECODE_WEBSAFE[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
final public int[] alphabet;
/** Non-data values in the DECODE arrays. */
private static final int SKIP = -1;
private static final int EQUALS = -2;
public DecoderState(int flags, byte[] output) {
/**
* States 0-3 are reading through the next input tuple.
* State 4 is having read one '=' and expecting exactly
* one more.
* State 5 is expecting no more data or padding characters
* in the input.
* State 6 is the error state; an error has been detected
* in the input and no future input can "fix" it.
*/
private int state; // state number (0 to 6)
private int value;
final private int[] alphabet;
public Decoder(int flags, byte[] output) {
this.output = output;
alphabet = ((flags & WEB_SAFE) == 0) ? DECODE : DECODE_WEBSAFE;
alphabet = ((flags & URL_SAFE) == 0) ? DECODE : DECODE_WEBSAFE;
state = 0;
value = 0;
}
}
/**
* Decode another block of input data.
*
* @param dstate a DecoderState object whose (caller-provided)
* output array is big enough to hold all the decoded data.
* On return, dstate.op will be set to the length of the
* decoded data.
* @param finish true if this is the final call to decodeInternal
* with the given DecoderState object. Will finalize the
* decoder state and include any final bytes in the output.
*
* @return true if the state machine is still healthy. false if
* bad base-64 data has been detected in the input stream.
*/
/**
* @return an overestimate for the number of bytes {@code
* len} bytes could decode to.
*/
public int maxOutputSize(int len) {
return len * 3/4 + 10;
}
/* package */ static boolean decodeInternal(
byte[] input, int offset, int len, final DecoderState dstate, boolean finish) {
if (dstate.state == 6) return false;
/**
* Decode another block of input data.
*
* @return true if the state machine is still healthy. false if
* bad base-64 data has been detected in the input stream.
*/
public boolean process(byte[] input, int offset, int len, boolean finish) {
if (this.state == 6) return false;
int state = dstate.state;
int value = dstate.value;
final int[] decode = dstate.alphabet;
final byte[] output = dstate.output;
int op = 0;
int p = offset;
len += offset;
int p = offset;
len += offset;
// Using local variables makes the decoder about 12%
// faster than if we manipulate the member variables in
// the loop. (Even alphabet makes a measurable
// difference, which is somewhat surprising to me since
// the member variable is final.)
int state = this.state;
int value = this.value;
int op = 0;
final byte[] output = this.output;
final int[] alphabet = this.alphabet;
while (p < len) {
while (p < len) {
// Try the fast path: we're starting a new tuple and the
// next four bytes of the input stream are all data
// bytes. This corresponds to going through states
// 0-1-2-3-0. We expect to use this method for most of
// the data.
//
// If any of the next four bytes of input are non-data
// (whitespace, etc.), value will end up negative. (All
// the non-data values in decode are small negative
// numbers, so shifting any of them up and or'ing them
// together will result in a value with its top bit set.)
//
// You can remove this whole block and the output should
// be the same, just slower.
if (state == 0) {
while (p+4 <= len &&
(value = ((alphabet[input[p] & 0xff] << 18) |
(alphabet[input[p+1] & 0xff] << 12) |
(alphabet[input[p+2] & 0xff] << 6) |
(alphabet[input[p+3] & 0xff]))) >= 0) {
output[op+2] = (byte) value;
output[op+1] = (byte) (value >> 8);
output[op] = (byte) (value >> 16);
op += 3;
p += 4;
}
if (p >= len) break;
}
// Try the fast path: we're starting a new tuple and the
// next four bytes of the input stream are all data
// bytes. This corresponds to going through states
// 0-1-2-3-0. We expect to use this method for most of
// the data.
//
// If any of the next four bytes of input are non-data
// (whitespace, etc.), value will end up negative. (All
// the non-data values in decode are small negative
// numbers, so shifting any of them up and or'ing them
// together will result in a value with its top bit set.)
//
// You can remove this whole block and the output should
// be the same, just slower.
if (state == 0 && p+4 <= len &&
(value = ((decode[input[p] & 0xff] << 18) |
(decode[input[p+1] & 0xff] << 12) |
(decode[input[p+2] & 0xff] << 6) |
(decode[input[p+3] & 0xff]))) >= 0) {
output[op+2] = (byte) value;
output[op+1] = (byte) (value >> 8);
output[op] = (byte) (value >> 16);
op += 3;
p += 4;
continue;
}
// The fast path isn't available -- either we've read a
// partial tuple, or the next four input bytes aren't all
// data, or whatever. Fall back to the slower state
// machine implementation.
// The fast path isn't available -- either we've read a
// partial tuple, or the next four input bytes aren't all
// data, or whatever. Fall back to the slower state
// machine implementation.
//
// States 0-3 are reading through the next input tuple.
// State 4 is having read one '=' and expecting exactly
// one more.
// State 5 is expecting no more data or padding characters
// in the input.
// State 6 is the error state; an error has been detected
// in the input and no future input can "fix" it.
int d = alphabet[input[p++] & 0xff];
int d = decode[input[p++] & 0xff];
switch (state) {
switch (state) {
case 0:
if (d >= 0) {
value = d;
++state;
} else if (d != SKIP) {
dstate.state = 6;
this.state = 6;
return false;
}
break;
@ -293,7 +330,7 @@ public class Base64 {
value = (value << 6) | d;
++state;
} else if (d != SKIP) {
dstate.state = 6;
this.state = 6;
return false;
}
break;
@ -308,7 +345,7 @@ public class Base64 {
output[op++] = (byte) (value >> 4);
state = 4;
} else if (d != SKIP) {
dstate.state = 6;
this.state = 6;
return false;
}
break;
@ -330,7 +367,7 @@ public class Base64 {
op += 2;
state = 5;
} else if (d != SKIP) {
dstate.state = 6;
this.state = 6;
return false;
}
break;
@ -339,41 +376,40 @@ public class Base64 {
if (d == EQUALS) {
++state;
} else if (d != SKIP) {
dstate.state = 6;
this.state = 6;
return false;
}
break;
case 5:
if (d != SKIP) {
dstate.state = 6;
this.state = 6;
return false;
}
break;
}
}
}
if (!finish) {
// We're out of input, but a future call could provide
// more. Return the output we've produced on this call
// and save the current state of the state machine.
dstate.state = state;
dstate.value = value;
dstate.op = op;
return true;
}
if (!finish) {
// We're out of input, but a future call could provide
// more.
this.state = state;
this.value = value;
this.op = op;
return true;
}
// Done reading input. Now figure out where we are left in
// the state machine and finish up.
// Done reading input. Now figure out where we are left in
// the state machine and finish up.
switch (state) {
switch (state) {
case 0:
// Output length is a multiple of three. Fine.
break;
case 1:
// Read one extra input byte, which isn't enough to
// make another output byte. Illegal.
dstate.state = 6;
this.state = 6;
return false;
case 2:
// Read two extra input bytes, enough to emit 1 more
@ -383,65 +419,29 @@ public class Base64 {
case 3:
// Read three extra input bytes, enough to emit 2 more
// output bytes. Fine.
output[op+1] = (byte) (value >> 2);
output[op] = (byte) (value >> 10);
op += 2;
output[op++] = (byte) (value >> 10);
output[op++] = (byte) (value >> 2);
break;
case 4:
// Read one padding '=' when we expected 2. Illegal.
dstate.state = 6;
this.state = 6;
return false;
case 5:
// Read all the padding '='s we expected and no more.
// Fine.
break;
}
}
dstate.op = op;
return true;
this.state = state;
this.op = op;
return true;
}
}
// --------------------------------------------------------
// encoding
// --------------------------------------------------------
/**
* Emit a new line every this many output tuples. Corresponds to
* a 76-character line length (the maximum allowable according to
* RFC 2045).
*/
private static final int LINE_GROUPS = 19;
/**
* Lookup table for turning Base64 alphabet positions (6 bits)
* into output bytes.
*/
private static final byte ENCODE[] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '0', '1', '2', '3',
'4', '5', '6', '7', '8', '9', '+', '/',
};
/**
* Lookup table for turning Base64 alphabet positions (6 bits)
* into output bytes.
*/
private static final byte ENCODE_WEBSAFE[] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '0', '1', '2', '3',
'4', '5', '6', '7', '8', '9', '-', '_',
};
/**
* Base64-encode the given data and return a newly allocated
* String with the result.
@ -452,7 +452,12 @@ public class Base64 {
* adheres to RFC 2045.
*/
public static String encodeToString(byte[] input, int flags) {
return new String(encode(input, flags));
try {
return new String(encode(input, flags), "US-ASCII");
} catch (UnsupportedEncodingException e) {
// US-ASCII is guaranteed to be available.
throw new AssertionError(e);
}
}
/**
@ -468,7 +473,12 @@ public class Base64 {
* adheres to RFC 2045.
*/
public static String encodeToString(byte[] input, int offset, int len, int flags) {
return new String(encode(input, offset, len, flags));
try {
return new String(encode(input, offset, len, flags), "US-ASCII");
} catch (UnsupportedEncodingException e) {
// US-ASCII is guaranteed to be available.
throw new AssertionError(e);
}
}
/**
@ -497,13 +507,13 @@ public class Base64 {
* adheres to RFC 2045.
*/
public static byte[] encode(byte[] input, int offset, int len, int flags) {
EncoderState state = new EncoderState(flags, null);
Encoder encoder = new Encoder(flags, null);
// Compute the exact length of the array we will produce.
int output_len = len / 3 * 4;
// Account for the tail of the data and the padding bytes, if any.
if (state.do_padding) {
if (encoder.do_padding) {
if (len % 3 > 0) {
output_len += 4;
}
@ -516,190 +526,215 @@ public class Base64 {
}
// Account for the newlines, if any.
if (state.do_newline && len > 0) {
output_len += (((len-1) / (3 * LINE_GROUPS)) + 1) * (state.do_cr ? 2 : 1);
if (encoder.do_newline && len > 0) {
output_len += (((len-1) / (3 * Encoder.LINE_GROUPS)) + 1) *
(encoder.do_cr ? 2 : 1);
}
state.output = new byte[output_len];
encodeInternal(input, offset, len, state, true);
encoder.output = new byte[output_len];
encoder.process(input, offset, len, true);
assert state.op == output_len;
assert encoder.op == output_len;
return state.output;
return encoder.output;
}
/* package */ static class EncoderState {
public byte[] output;
public int op;
/* package */ static class Encoder extends Coder {
/**
* Emit a new line every this many output tuples. Corresponds to
* a 76-character line length (the maximum allowable according to
* <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>).
*/
public static final int LINE_GROUPS = 19;
final public byte[] tail;
public int tailLen;
public int count;
/**
* Lookup table for turning Base64 alphabet positions (6 bits)
* into output bytes.
*/
private static final byte ENCODE[] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/',
};
/**
* Lookup table for turning Base64 alphabet positions (6 bits)
* into output bytes.
*/
private static final byte ENCODE_WEBSAFE[] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_',
};
final private byte[] tail;
/* package */ int tailLen;
private int count;
final public boolean do_padding;
final public boolean do_newline;
final public boolean do_cr;
final public byte[] alphabet;
final private byte[] alphabet;
public EncoderState(int flags, byte[] output) {
public Encoder(int flags, byte[] output) {
this.output = output;
do_padding = (flags & NO_PADDING) == 0;
do_newline = (flags & NO_WRAP) == 0;
do_cr = (flags & CRLF) != 0;
alphabet = ((flags & WEB_SAFE) == 0) ? ENCODE : ENCODE_WEBSAFE;
alphabet = ((flags & URL_SAFE) == 0) ? ENCODE : ENCODE_WEBSAFE;
tail = new byte[2];
tailLen = 0;
count = do_newline ? LINE_GROUPS : -1;
}
}
/**
* Encode another block of input data.
*
* @param estate an EncoderState object whose (caller-provided)
* output array is big enough to hold all the encoded data.
* On return, estate.op will be set to the length of the
* encoded data.
* @param finish true if this is the final call to encodeInternal
* with the given EncoderState object. Will finalize the
* encoder state and include any final bytes in the output.
*/
static void encodeInternal(byte[] input, int offset, int len,
final EncoderState estate, boolean finish) {
final boolean do_cr = estate.do_cr;
final boolean do_newline = estate.do_newline;
final boolean do_padding = estate.do_padding;
final byte[] output = estate.output;
int op = 0;
int p = offset;
len += offset;
int v = -1;
int count = estate.count;
// First we need to concatenate the tail of the previous call
// with any input bytes available now and see if we can empty
// the tail.
switch (estate.tailLen) {
case 0:
// There was no tail.
break;
case 1:
if (p+2 <= len) {
// A 1-byte tail with at least 2 bytes of
// input available now.
v = ((estate.tail[0] & 0xff) << 16) |
((input[p++] & 0xff) << 8) |
(input[p++] & 0xff);
estate.tailLen = 0;
};
break;
case 2:
if (p+1 <= len) {
// A 2-byte tail with at least 1 byte of input.
v = ((estate.tail[0] & 0xff) << 16) |
((estate.tail[1] & 0xff) << 8) |
(input[p++] & 0xff);
estate.tailLen = 0;
}
break;
/**
* @return an overestimate for the number of bytes {@code
* len} bytes could encode to.
*/
public int maxOutputSize(int len) {
return len * 8/5 + 10;
}
if (v != -1) {
output[op++] = estate.alphabet[(v >> 18) & 0x3f];
output[op++] = estate.alphabet[(v >> 12) & 0x3f];
output[op++] = estate.alphabet[(v >> 6) & 0x3f];
output[op++] = estate.alphabet[v & 0x3f];
if (--count == 0) {
if (do_cr) output[op++] = '\r';
output[op++] = '\n';
count = LINE_GROUPS;
public boolean process(byte[] input, int offset, int len, boolean finish) {
// Using local variables makes the encoder about 9% faster.
final byte[] alphabet = this.alphabet;
final byte[] output = this.output;
int op = 0;
int count = this.count;
int p = offset;
len += offset;
int v = -1;
// First we need to concatenate the tail of the previous call
// with any input bytes available now and see if we can empty
// the tail.
switch (tailLen) {
case 0:
// There was no tail.
break;
case 1:
if (p+2 <= len) {
// A 1-byte tail with at least 2 bytes of
// input available now.
v = ((tail[0] & 0xff) << 16) |
((input[p++] & 0xff) << 8) |
(input[p++] & 0xff);
tailLen = 0;
};
break;
case 2:
if (p+1 <= len) {
// A 2-byte tail with at least 1 byte of input.
v = ((tail[0] & 0xff) << 16) |
((tail[1] & 0xff) << 8) |
(input[p++] & 0xff);
tailLen = 0;
}
break;
}
}
// At this point either there is no tail, or there are fewer
// than 3 bytes of input available.
// The main loop, turning 3 input bytes into 4 output bytes on
// each iteration.
while (p+3 <= len) {
v = ((input[p++] & 0xff) << 16) |
((input[p++] & 0xff) << 8) |
(input[p++] & 0xff);
output[op++] = estate.alphabet[(v >> 18) & 0x3f];
output[op++] = estate.alphabet[(v >> 12) & 0x3f];
output[op++] = estate.alphabet[(v >> 6) & 0x3f];
output[op++] = estate.alphabet[v & 0x3f];
if (--count == 0) {
if (do_cr) output[op++] = '\r';
output[op++] = '\n';
count = LINE_GROUPS;
}
}
if (finish) {
// Finish up the tail of the input. Note that we need to
// consume any bytes in estate.tail before any bytes
// remaining in input; there should be at most two bytes
// total.
if (p-estate.tailLen == len-1) {
int t = 0;
v = ((estate.tailLen > 0 ? estate.tail[t++] : input[p++]) & 0xff) << 4;
estate.tailLen -= t;
output[op++] = estate.alphabet[(v >> 6) & 0x3f];
output[op++] = estate.alphabet[v & 0x3f];
if (do_padding) {
output[op++] = '=';
output[op++] = '=';
if (v != -1) {
output[op++] = alphabet[(v >> 18) & 0x3f];
output[op++] = alphabet[(v >> 12) & 0x3f];
output[op++] = alphabet[(v >> 6) & 0x3f];
output[op++] = alphabet[v & 0x3f];
if (--count == 0) {
if (do_cr) output[op++] = '\r';
output[op++] = '\n';
count = LINE_GROUPS;
}
if (do_newline) {
}
// At this point either there is no tail, or there are fewer
// than 3 bytes of input available.
// The main loop, turning 3 input bytes into 4 output bytes on
// each iteration.
while (p+3 <= len) {
v = ((input[p] & 0xff) << 16) |
((input[p+1] & 0xff) << 8) |
(input[p+2] & 0xff);
output[op] = alphabet[(v >> 18) & 0x3f];
output[op+1] = alphabet[(v >> 12) & 0x3f];
output[op+2] = alphabet[(v >> 6) & 0x3f];
output[op+3] = alphabet[v & 0x3f];
p += 3;
op += 4;
if (--count == 0) {
if (do_cr) output[op++] = '\r';
output[op++] = '\n';
count = LINE_GROUPS;
}
}
if (finish) {
// Finish up the tail of the input. Note that we need to
// consume any bytes in tail before any bytes
// remaining in input; there should be at most two bytes
// total.
if (p-tailLen == len-1) {
int t = 0;
v = ((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 4;
tailLen -= t;
output[op++] = alphabet[(v >> 6) & 0x3f];
output[op++] = alphabet[v & 0x3f];
if (do_padding) {
output[op++] = '=';
output[op++] = '=';
}
if (do_newline) {
if (do_cr) output[op++] = '\r';
output[op++] = '\n';
}
} else if (p-tailLen == len-2) {
int t = 0;
v = (((tailLen > 1 ? tail[t++] : input[p++]) & 0xff) << 10) |
(((tailLen > 0 ? tail[t++] : input[p++]) & 0xff) << 2);
tailLen -= t;
output[op++] = alphabet[(v >> 12) & 0x3f];
output[op++] = alphabet[(v >> 6) & 0x3f];
output[op++] = alphabet[v & 0x3f];
if (do_padding) {
output[op++] = '=';
}
if (do_newline) {
if (do_cr) output[op++] = '\r';
output[op++] = '\n';
}
} else if (do_newline && op > 0 && count != LINE_GROUPS) {
if (do_cr) output[op++] = '\r';
output[op++] = '\n';
}
} else if (p-estate.tailLen == len-2) {
int t = 0;
v = (((estate.tailLen > 1 ? estate.tail[t++] : input[p++]) & 0xff) << 10) |
(((estate.tailLen > 0 ? estate.tail[t++] : input[p++]) & 0xff) << 2);
estate.tailLen -= t;
output[op++] = estate.alphabet[(v >> 12) & 0x3f];
output[op++] = estate.alphabet[(v >> 6) & 0x3f];
output[op++] = estate.alphabet[v & 0x3f];
if (do_padding) {
output[op++] = '=';
assert tailLen == 0;
assert p == len;
} else {
// Save the leftovers in tail to be consumed on the next
// call to encodeInternal.
if (p == len-1) {
tail[tailLen++] = input[p];
} else if (p == len-2) {
tail[tailLen++] = input[p];
tail[tailLen++] = input[p+1];
}
if (do_newline) {
if (do_cr) output[op++] = '\r';
output[op++] = '\n';
}
} else if (do_newline && op > 0 && count != LINE_GROUPS) {
if (do_cr) output[op++] = '\r';
output[op++] = '\n';
}
assert estate.tailLen == 0;
assert p == len;
} else {
// Save the leftovers in tail to be consumed on the next
// call to encodeInternal.
this.op = op;
this.count = count;
if (p == len-1) {
estate.tail[estate.tailLen++] = input[p];
} else if (p == len-2) {
estate.tail[estate.tailLen++] = input[p];
estate.tail[estate.tailLen++] = input[p+1];
}
return true;
}
estate.op = op;
estate.count = count;
}
private Base64() { } // don't instantiate

View File

@ -25,16 +25,13 @@ import java.io.InputStream;
* it.
*/
public class Base64InputStream extends FilterInputStream {
private final boolean encode;
private final Base64.EncoderState estate;
private final Base64.DecoderState dstate;
private final Base64.Coder coder;
private static byte[] EMPTY = new byte[0];
private static final int BUFFER_SIZE = 2048;
private boolean eof;
private byte[] inputBuffer;
private byte[] outputBuffer;
private int outputStart;
private int outputEnd;
@ -63,22 +60,14 @@ public class Base64InputStream extends FilterInputStream {
*/
public Base64InputStream(InputStream in, int flags, boolean encode) {
super(in);
this.encode = encode;
eof = false;
inputBuffer = new byte[BUFFER_SIZE];
if (encode) {
// len*8/5+10 is an overestimate of the most bytes the
// encoder can produce for len bytes of input.
outputBuffer = new byte[BUFFER_SIZE * 8/5 + 10];
estate = new Base64.EncoderState(flags, outputBuffer);
dstate = null;
coder = new Base64.Encoder(flags, null);
} else {
// len*3/4+10 is an overestimate of the most bytes the
// decoder can produce for len bytes of input.
outputBuffer = new byte[BUFFER_SIZE * 3/4 + 10];
estate = null;
dstate = new Base64.DecoderState(flags, outputBuffer);
coder = new Base64.Decoder(flags, null);
}
coder.output = new byte[coder.maxOutputSize(BUFFER_SIZE)];
outputStart = 0;
outputEnd = 0;
}
@ -123,7 +112,7 @@ public class Base64InputStream extends FilterInputStream {
if (outputStart >= outputEnd) {
return -1;
} else {
return outputBuffer[outputStart++];
return coder.output[outputStart++];
}
}
@ -135,36 +124,30 @@ public class Base64InputStream extends FilterInputStream {
return -1;
}
int bytes = Math.min(len, outputEnd-outputStart);
System.arraycopy(outputBuffer, outputStart, b, off, bytes);
System.arraycopy(coder.output, outputStart, b, off, bytes);
outputStart += bytes;
return bytes;
}
/**
* Read data from the input stream into inputBuffer, then
* decode/encode it into the empty outputBuffer, and reset the
* decode/encode it into the empty coder.output, and reset the
* outputStart and outputEnd pointers.
*/
private void refill() throws IOException {
if (eof) return;
int bytesRead = in.read(inputBuffer);
if (encode) {
if (bytesRead == -1) {
eof = true;
Base64.encodeInternal(EMPTY, 0, 0, estate, true);
} else {
Base64.encodeInternal(inputBuffer, 0, bytesRead, estate, false);
}
outputEnd = estate.op;
boolean success;
if (bytesRead == -1) {
eof = true;
success = coder.process(EMPTY, 0, 0, true);
} else {
if (bytesRead == -1) {
eof = true;
Base64.decodeInternal(EMPTY, 0, 0, dstate, true);
} else {
Base64.decodeInternal(inputBuffer, 0, bytesRead, dstate, false);
}
outputEnd = dstate.op;
success = coder.process(inputBuffer, 0, bytesRead, false);
}
if (!success) {
throw new IOException("bad base-64");
}
outputEnd = coder.op;
outputStart = 0;
}
}

View File

@ -25,9 +25,7 @@ import java.io.OutputStream;
* it, writing the resulting data to another OutputStream.
*/
public class Base64OutputStream extends FilterOutputStream {
private final boolean encode;
private final Base64.EncoderState estate;
private final Base64.DecoderState dstate;
private final Base64.Coder coder;
private final int flags;
private byte[] buffer = null;
@ -62,13 +60,10 @@ public class Base64OutputStream extends FilterOutputStream {
public Base64OutputStream(OutputStream out, int flags, boolean encode) {
super(out);
this.flags = flags;
this.encode = encode;
if (encode) {
estate = new Base64.EncoderState(flags, null);
dstate = null;
coder = new Base64.Encoder(flags, null);
} else {
estate = null;
dstate = new Base64.DecoderState(flags, null);
coder = new Base64.Decoder(flags, null);
}
}
@ -107,12 +102,28 @@ public class Base64OutputStream extends FilterOutputStream {
}
public void close() throws IOException {
flushBuffer();
internalWrite(EMPTY, 0, 0, true);
if ((flags & Base64.NO_CLOSE) == 0) {
out.close();
} else {
out.flush();
IOException thrown = null;
try {
flushBuffer();
internalWrite(EMPTY, 0, 0, true);
} catch (IOException e) {
thrown = e;
}
try {
if ((flags & Base64.NO_CLOSE) == 0) {
out.close();
} else {
out.flush();
}
} catch (IOException e) {
if (thrown != null) {
thrown = e;
}
}
if (thrown != null) {
throw thrown;
}
}
@ -123,21 +134,11 @@ public class Base64OutputStream extends FilterOutputStream {
* encoder/decoder state to be finalized.
*/
private void internalWrite(byte[] b, int off, int len, boolean finish) throws IOException {
if (encode) {
// len*8/5+10 is an overestimate of the most bytes the
// encoder can produce for len bytes of input.
estate.output = embiggen(estate.output, len*8/5+10);
Base64.encodeInternal(b, off, len, estate, finish);
out.write(estate.output, 0, estate.op);
} else {
// len*3/4+10 is an overestimate of the most bytes the
// decoder can produce for len bytes of input.
dstate.output = embiggen(dstate.output, len*3/4+10);
if (!Base64.decodeInternal(b, off, len, dstate, finish)) {
throw new IOException("bad base-64");
}
out.write(dstate.output, 0, dstate.op);
coder.output = embiggen(coder.output, coder.maxOutputSize(len));
if (!coder.process(b, off, len, finish)) {
throw new IOException("bad base-64");
}
out.write(coder.output, 0, coder.op);
}
/**