1 package org.codehaus.plexus.util;
2
3 /*
4 * Copyright The Codehaus Foundation.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 /**
20 * Provides Base64 encoding and decoding as defined by RFC 2045.
21 * <p>
22 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
23 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
24 * </p>
25 *
26 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
27 * @author Apache Software Foundation
28 * @since 1.0-dev
29 *
30 */
31 public class Base64
32 {
33
34 //
35 // Source Id: Base64.java 161350 2005-04-14 20:39:46Z ggregory
36 //
37
38 /**
39 * Chunk size per RFC 2045 section 6.8.
40 * <p>
41 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
42 * equal signs.
43 * </p>
44 *
45 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
46 */
47 static final int CHUNK_SIZE = 76;
48
49 /**
50 * Chunk separator per RFC 2045 section 2.1.
51 *
52 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
53 */
54 static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
55
56 /**
57 * The base length.
58 */
59 static final int BASELENGTH = 255;
60
61 /**
62 * Lookup length.
63 */
64 static final int LOOKUPLENGTH = 64;
65
66 /**
67 * Used to calculate the number of bits in a byte.
68 */
69 static final int EIGHTBIT = 8;
70
71 /**
72 * Used when encoding something which has fewer than 24 bits.
73 */
74 static final int SIXTEENBIT = 16;
75
76 /**
77 * Used to determine how many bits data contains.
78 */
79 static final int TWENTYFOURBITGROUP = 24;
80
81 /**
82 * Used to get the number of Quadruples.
83 */
84 static final int FOURBYTE = 4;
85
86 /**
87 * Used to test the sign of a byte.
88 */
89 static final int SIGN = -128;
90
91 /**
92 * Byte used to pad output.
93 */
94 static final byte PAD = (byte) '=';
95
96 /**
97 * Contains the Base64 values <code>0</code> through <code>63</code> accessed by using character encodings as
98 * indices.
99 * <p>
100 * For example, <code>base64Alphabet['+']</code> returns <code>62</code>.
101 * </p>
102 * <p>
103 * The value of undefined encodings is <code>-1</code>.
104 * </p>
105 */
106 private static byte[] base64Alphabet = new byte[BASELENGTH];
107
108 /**
109 * <p>
110 * Contains the Base64 encodings <code>A</code> through <code>Z</code>, followed by <code>a</code> through
111 * <code>z</code>, followed by <code>0</code> through <code>9</code>, followed by <code>+</code>, and
112 * <code>/</code>.
113 * </p>
114 * <p>
115 * This array is accessed by using character values as indices.
116 * </p>
117 * <p>
118 * For example, <code>lookUpBase64Alphabet[62] </code> returns <code>'+'</code>.
119 * </p>
120 */
121 private static byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
122
123 // Populating the lookup and character arrays
124 static
125 {
126 for ( int i = 0; i < BASELENGTH; i++ )
127 {
128 base64Alphabet[i] = (byte) -1;
129 }
130 for ( int i = 'Z'; i >= 'A'; i-- )
131 {
132 base64Alphabet[i] = (byte) ( i - 'A' );
133 }
134 for ( int i = 'z'; i >= 'a'; i-- )
135 {
136 base64Alphabet[i] = (byte) ( i - 'a' + 26 );
137 }
138 for ( int i = '9'; i >= '0'; i-- )
139 {
140 base64Alphabet[i] = (byte) ( i - '0' + 52 );
141 }
142
143 base64Alphabet['+'] = 62;
144 base64Alphabet['/'] = 63;
145
146 for ( int i = 0; i <= 25; i++ )
147 {
148 lookUpBase64Alphabet[i] = (byte) ( 'A' + i );
149 }
150
151 for ( int i = 26, j = 0; i <= 51; i++, j++ )
152 {
153 lookUpBase64Alphabet[i] = (byte) ( 'a' + j );
154 }
155
156 for ( int i = 52, j = 0; i <= 61; i++, j++ )
157 {
158 lookUpBase64Alphabet[i] = (byte) ( '0' + j );
159 }
160
161 lookUpBase64Alphabet[62] = (byte) '+';
162 lookUpBase64Alphabet[63] = (byte) '/';
163 }
164
165 /**
166 * Returns whether or not the <code>octect</code> is in the base 64 alphabet.
167 *
168 * @param octect The value to test
169 * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
170 */
171 private static boolean isBase64( byte octect )
172 {
173 if ( octect == PAD )
174 {
175 return true;
176 }
177 else if ( octect < 0 || base64Alphabet[octect] == -1 )
178 {
179 return false;
180 }
181 else
182 {
183 return true;
184 }
185 }
186
187 /**
188 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet.
189 *
190 * @param arrayOctect byte array to test
191 * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
192 * false, otherwise
193 */
194 public static boolean isArrayByteBase64( byte[] arrayOctect )
195 {
196
197 arrayOctect = discardWhitespace( arrayOctect );
198
199 int length = arrayOctect.length;
200 if ( length == 0 )
201 {
202 // shouldn't a 0 length array be valid base64 data?
203 // return false;
204 return true;
205 }
206 for ( byte anArrayOctect : arrayOctect )
207 {
208 if ( !isBase64( anArrayOctect ) )
209 {
210 return false;
211 }
212 }
213 return true;
214 }
215
216 /**
217 * Encodes binary data using the base64 algorithm but does not chunk the output.
218 *
219 * @param binaryData binary data to encode
220 * @return Base64 characters
221 */
222 public static byte[] encodeBase64( byte[] binaryData )
223 {
224 return encodeBase64( binaryData, false );
225 }
226
227 /**
228 * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
229 *
230 * @param binaryData binary data to encode
231 * @return Base64 characters chunked in 76 character blocks
232 */
233 public static byte[] encodeBase64Chunked( byte[] binaryData )
234 {
235 return encodeBase64( binaryData, true );
236 }
237
238 /**
239 * Decodes a byte[] containing containing characters in the Base64 alphabet.
240 *
241 * @param pArray A byte array containing Base64 character data
242 * @return a byte array containing binary data
243 */
244 public byte[] decode( byte[] pArray )
245 {
246 return decodeBase64( pArray );
247 }
248
249 /**
250 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
251 *
252 * @param binaryData Array containing binary data to encode.
253 * @param isChunked if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
254 * @return Base64-encoded data.
255 */
256 public static byte[] encodeBase64( byte[] binaryData, boolean isChunked )
257 {
258 int lengthDataBits = binaryData.length * EIGHTBIT;
259 int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
260 int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP;
261 byte encodedData[] = null;
262 int encodedDataLength = 0;
263 int nbrChunks = 0;
264
265 if ( fewerThan24bits != 0 )
266 {
267 // data not divisible by 24 bit
268 encodedDataLength = ( numberTriplets + 1 ) * 4;
269 }
270 else
271 {
272 // 16 or 8 bit
273 encodedDataLength = numberTriplets * 4;
274 }
275
276 // If the output is to be "chunked" into 76 character sections,
277 // for compliance with RFC 2045 MIME, then it is important to
278 // allow for extra length to account for the separator(s)
279 if ( isChunked )
280 {
281
282 nbrChunks = ( CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math.ceil( (float) encodedDataLength / CHUNK_SIZE ) );
283 encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length;
284 }
285
286 encodedData = new byte[encodedDataLength];
287
288 byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0;
289
290 int encodedIndex = 0;
291 int dataIndex = 0;
292 int i = 0;
293 int nextSeparatorIndex = CHUNK_SIZE;
294 int chunksSoFar = 0;
295
296 // log.debug("number of triplets = " + numberTriplets);
297 for ( i = 0; i < numberTriplets; i++ )
298 {
299 dataIndex = i * 3;
300 b1 = binaryData[dataIndex];
301 b2 = binaryData[dataIndex + 1];
302 b3 = binaryData[dataIndex + 2];
303
304 // log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
305
306 l = (byte) ( b2 & 0x0f );
307 k = (byte) ( b1 & 0x03 );
308
309 byte val1 = ( ( b1 & SIGN ) == 0 ) ? (byte) ( b1 >> 2 ) : (byte) ( ( b1 ) >> 2 ^ 0xc0 );
310 byte val2 = ( ( b2 & SIGN ) == 0 ) ? (byte) ( b2 >> 4 ) : (byte) ( ( b2 ) >> 4 ^ 0xf0 );
311 byte val3 = ( ( b3 & SIGN ) == 0 ) ? (byte) ( b3 >> 6 ) : (byte) ( ( b3 ) >> 6 ^ 0xfc );
312
313 encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
314 // log.debug( "val2 = " + val2 );
315 // log.debug( "k4 = " + (k<<4) );
316 // log.debug( "vak = " + (val2 | (k<<4)) );
317 encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | ( k << 4 )];
318 encodedData[encodedIndex + 2] = lookUpBase64Alphabet[( l << 2 ) | val3];
319 encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
320
321 encodedIndex += 4;
322
323 // If we are chunking, let's put a chunk separator down.
324 if ( isChunked )
325 {
326 // this assumes that CHUNK_SIZE % 4 == 0
327 if ( encodedIndex == nextSeparatorIndex )
328 {
329 System.arraycopy( CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length );
330 chunksSoFar++;
331 nextSeparatorIndex =
332 ( CHUNK_SIZE * ( chunksSoFar + 1 ) ) + ( chunksSoFar * CHUNK_SEPARATOR.length );
333 encodedIndex += CHUNK_SEPARATOR.length;
334 }
335 }
336 }
337
338 // form integral number of 6-bit groups
339 dataIndex = i * 3;
340
341 if ( fewerThan24bits == EIGHTBIT )
342 {
343 b1 = binaryData[dataIndex];
344 k = (byte) ( b1 & 0x03 );
345 // log.debug("b1=" + b1);
346 // log.debug("b1<<2 = " + (b1>>2) );
347 byte val1 = ( ( b1 & SIGN ) == 0 ) ? (byte) ( b1 >> 2 ) : (byte) ( ( b1 ) >> 2 ^ 0xc0 );
348 encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
349 encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
350 encodedData[encodedIndex + 2] = PAD;
351 encodedData[encodedIndex + 3] = PAD;
352 }
353 else if ( fewerThan24bits == SIXTEENBIT )
354 {
355
356 b1 = binaryData[dataIndex];
357 b2 = binaryData[dataIndex + 1];
358 l = (byte) ( b2 & 0x0f );
359 k = (byte) ( b1 & 0x03 );
360
361 byte val1 = ( ( b1 & SIGN ) == 0 ) ? (byte) ( b1 >> 2 ) : (byte) ( ( b1 ) >> 2 ^ 0xc0 );
362 byte val2 = ( ( b2 & SIGN ) == 0 ) ? (byte) ( b2 >> 4 ) : (byte) ( ( b2 ) >> 4 ^ 0xf0 );
363
364 encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
365 encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | ( k << 4 )];
366 encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
367 encodedData[encodedIndex + 3] = PAD;
368 }
369
370 if ( isChunked )
371 {
372 // we also add a separator to the end of the final chunk.
373 if ( chunksSoFar < nbrChunks )
374 {
375 System.arraycopy( CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length,
376 CHUNK_SEPARATOR.length );
377 }
378 }
379
380 return encodedData;
381 }
382
383 /**
384 * Decodes Base64 data into octects
385 *
386 * @param base64Data Byte array containing Base64 data
387 * @return Array containing decoded data.
388 */
389 public static byte[] decodeBase64( byte[] base64Data )
390 {
391 // RFC 2045 requires that we discard ALL non-Base64 characters
392 base64Data = discardNonBase64( base64Data );
393
394 // handle the edge case, so we don't have to worry about it later
395 if ( base64Data.length == 0 )
396 {
397 return new byte[0];
398 }
399
400 int numberQuadruple = base64Data.length / FOURBYTE;
401 byte decodedData[] = null;
402 byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0;
403
404 // Throw away anything not in base64Data
405
406 int encodedIndex = 0;
407 int dataIndex = 0;
408 {
409 // this sizes the output array properly - rlw
410 int lastData = base64Data.length;
411 // ignore the '=' padding
412 while ( base64Data[lastData - 1] == PAD )
413 {
414 if ( --lastData == 0 )
415 {
416 return new byte[0];
417 }
418 }
419 decodedData = new byte[lastData - numberQuadruple];
420 }
421
422 for ( int i = 0; i < numberQuadruple; i++ )
423 {
424 dataIndex = i * 4;
425 marker0 = base64Data[dataIndex + 2];
426 marker1 = base64Data[dataIndex + 3];
427
428 b1 = base64Alphabet[base64Data[dataIndex]];
429 b2 = base64Alphabet[base64Data[dataIndex + 1]];
430
431 if ( marker0 != PAD && marker1 != PAD )
432 {
433 // No PAD e.g 3cQl
434 b3 = base64Alphabet[marker0];
435 b4 = base64Alphabet[marker1];
436
437 decodedData[encodedIndex] = (byte) ( b1 << 2 | b2 >> 4 );
438 decodedData[encodedIndex + 1] = (byte) ( ( ( b2 & 0xf ) << 4 ) | ( ( b3 >> 2 ) & 0xf ) );
439 decodedData[encodedIndex + 2] = (byte) ( b3 << 6 | b4 );
440 }
441 else if ( marker0 == PAD )
442 {
443 // Two PAD e.g. 3c[Pad][Pad]
444 decodedData[encodedIndex] = (byte) ( b1 << 2 | b2 >> 4 );
445 }
446 else if ( marker1 == PAD )
447 {
448 // One PAD e.g. 3cQ[Pad]
449 b3 = base64Alphabet[marker0];
450
451 decodedData[encodedIndex] = (byte) ( b1 << 2 | b2 >> 4 );
452 decodedData[encodedIndex + 1] = (byte) ( ( ( b2 & 0xf ) << 4 ) | ( ( b3 >> 2 ) & 0xf ) );
453 }
454 encodedIndex += 3;
455 }
456 return decodedData;
457 }
458
459 /**
460 * Discards any whitespace from a base-64 encoded block.
461 *
462 * @param data The base-64 encoded data to discard the whitespace from.
463 * @return The data, less whitespace (see RFC 2045).
464 */
465 static byte[] discardWhitespace( byte[] data )
466 {
467 byte groomedData[] = new byte[data.length];
468 int bytesCopied = 0;
469
470 for ( byte aData : data )
471 {
472 switch ( aData )
473 {
474 case (byte) ' ':
475 case (byte) '\n':
476 case (byte) '\r':
477 case (byte) '\t':
478 break;
479 default:
480 groomedData[bytesCopied++] = aData;
481 }
482 }
483
484 byte packedData[] = new byte[bytesCopied];
485
486 System.arraycopy( groomedData, 0, packedData, 0, bytesCopied );
487
488 return packedData;
489 }
490
491 /**
492 * Discards any characters outside of the base64 alphabet, per the requirements on page 25 of RFC 2045 - "Any
493 * characters outside of the base64 alphabet are to be ignored in base64 encoded data."
494 *
495 * @param data The base-64 encoded data to groom
496 * @return The data, less non-base64 characters (see RFC 2045).
497 */
498 static byte[] discardNonBase64( byte[] data )
499 {
500 byte groomedData[] = new byte[data.length];
501 int bytesCopied = 0;
502
503 for ( byte aData : data )
504 {
505 if ( isBase64( aData ) )
506 {
507 groomedData[bytesCopied++] = aData;
508 }
509 }
510
511 byte packedData[] = new byte[bytesCopied];
512
513 System.arraycopy( groomedData, 0, packedData, 0, bytesCopied );
514
515 return packedData;
516 }
517
518 /**
519 * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet.
520 *
521 * @param pArray a byte array containing binary data
522 * @return A byte array containing only Base64 character data
523 */
524 public byte[] encode( byte[] pArray )
525 {
526 return encodeBase64( pArray, false );
527 }
528
529 }