View Javadoc
1   package org.codehaus.plexus.util;
2   
3   /*
4    * Copyright The Codehaus Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  /**
20   * Provides Base64 encoding and decoding as defined by RFC 2045.
21   * <p>
22   * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
23   * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
24   * </p>
25   *
26   * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
27   * @author Apache Software Foundation
28   * @since 1.0-dev
29   *
30   */
31  public class Base64
32  {
33  
34      //
35      // Source Id: Base64.java 161350 2005-04-14 20:39:46Z ggregory
36      //
37  
38      /**
39       * Chunk size per RFC 2045 section 6.8.
40       * <p>
41       * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
42       * equal signs.
43       * </p>
44       *
45       * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
46       */
47      static final int CHUNK_SIZE = 76;
48  
49      /**
50       * Chunk separator per RFC 2045 section 2.1.
51       *
52       * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
53       */
54      static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
55  
56      /**
57       * The base length.
58       */
59      static final int BASELENGTH = 255;
60  
61      /**
62       * Lookup length.
63       */
64      static final int LOOKUPLENGTH = 64;
65  
66      /**
67       * Used to calculate the number of bits in a byte.
68       */
69      static final int EIGHTBIT = 8;
70  
71      /**
72       * Used when encoding something which has fewer than 24 bits.
73       */
74      static final int SIXTEENBIT = 16;
75  
76      /**
77       * Used to determine how many bits data contains.
78       */
79      static final int TWENTYFOURBITGROUP = 24;
80  
81      /**
82       * Used to get the number of Quadruples.
83       */
84      static final int FOURBYTE = 4;
85  
86      /**
87       * Used to test the sign of a byte.
88       */
89      static final int SIGN = -128;
90  
91      /**
92       * Byte used to pad output.
93       */
94      static final byte PAD = (byte) '=';
95  
96      /**
97       * Contains the Base64 values <code>0</code> through <code>63</code> accessed by using character encodings as
98       * indices.
99       * <p>
100      * For example, <code>base64Alphabet['+']</code> returns <code>62</code>.
101      * </p>
102      * <p>
103      * The value of undefined encodings is <code>-1</code>.
104      * </p>
105      */
106     private static byte[] base64Alphabet = new byte[BASELENGTH];
107 
108     /**
109      * <p>
110      * Contains the Base64 encodings <code>A</code> through <code>Z</code>, followed by <code>a</code> through
111      * <code>z</code>, followed by <code>0</code> through <code>9</code>, followed by <code>+</code>, and
112      * <code>/</code>.
113      * </p>
114      * <p>
115      * This array is accessed by using character values as indices.
116      * </p>
117      * <p>
118      * For example, <code>lookUpBase64Alphabet[62] </code> returns <code>'+'</code>.
119      * </p>
120      */
121     private static byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
122 
123     // Populating the lookup and character arrays
124     static
125     {
126         for ( int i = 0; i < BASELENGTH; i++ )
127         {
128             base64Alphabet[i] = (byte) -1;
129         }
130         for ( int i = 'Z'; i >= 'A'; i-- )
131         {
132             base64Alphabet[i] = (byte) ( i - 'A' );
133         }
134         for ( int i = 'z'; i >= 'a'; i-- )
135         {
136             base64Alphabet[i] = (byte) ( i - 'a' + 26 );
137         }
138         for ( int i = '9'; i >= '0'; i-- )
139         {
140             base64Alphabet[i] = (byte) ( i - '0' + 52 );
141         }
142 
143         base64Alphabet['+'] = 62;
144         base64Alphabet['/'] = 63;
145 
146         for ( int i = 0; i <= 25; i++ )
147         {
148             lookUpBase64Alphabet[i] = (byte) ( 'A' + i );
149         }
150 
151         for ( int i = 26, j = 0; i <= 51; i++, j++ )
152         {
153             lookUpBase64Alphabet[i] = (byte) ( 'a' + j );
154         }
155 
156         for ( int i = 52, j = 0; i <= 61; i++, j++ )
157         {
158             lookUpBase64Alphabet[i] = (byte) ( '0' + j );
159         }
160 
161         lookUpBase64Alphabet[62] = (byte) '+';
162         lookUpBase64Alphabet[63] = (byte) '/';
163     }
164 
165     /**
166      * Returns whether or not the <code>octect</code> is in the base 64 alphabet.
167      *
168      * @param octect The value to test
169      * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
170      */
171     private static boolean isBase64( byte octect )
172     {
173         if ( octect == PAD )
174         {
175             return true;
176         }
177         else if ( octect < 0 || base64Alphabet[octect] == -1 )
178         {
179             return false;
180         }
181         else
182         {
183             return true;
184         }
185     }
186 
187     /**
188      * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet.
189      *
190      * @param arrayOctect byte array to test
191      * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
192      *         false, otherwise
193      */
194     public static boolean isArrayByteBase64( byte[] arrayOctect )
195     {
196 
197         arrayOctect = discardWhitespace( arrayOctect );
198 
199         int length = arrayOctect.length;
200         if ( length == 0 )
201         {
202             // shouldn't a 0 length array be valid base64 data?
203             // return false;
204             return true;
205         }
206         for ( byte anArrayOctect : arrayOctect )
207         {
208             if ( !isBase64( anArrayOctect ) )
209             {
210                 return false;
211             }
212         }
213         return true;
214     }
215 
216     /**
217      * Encodes binary data using the base64 algorithm but does not chunk the output.
218      *
219      * @param binaryData binary data to encode
220      * @return Base64 characters
221      */
222     public static byte[] encodeBase64( byte[] binaryData )
223     {
224         return encodeBase64( binaryData, false );
225     }
226 
227     /**
228      * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
229      *
230      * @param binaryData binary data to encode
231      * @return Base64 characters chunked in 76 character blocks
232      */
233     public static byte[] encodeBase64Chunked( byte[] binaryData )
234     {
235         return encodeBase64( binaryData, true );
236     }
237 
238     /**
239      * Decodes a byte[] containing containing characters in the Base64 alphabet.
240      *
241      * @param pArray A byte array containing Base64 character data
242      * @return a byte array containing binary data
243      */
244     public byte[] decode( byte[] pArray )
245     {
246         return decodeBase64( pArray );
247     }
248 
249     /**
250      * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
251      *
252      * @param binaryData Array containing binary data to encode.
253      * @param isChunked if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
254      * @return Base64-encoded data.
255      */
256     public static byte[] encodeBase64( byte[] binaryData, boolean isChunked )
257     {
258         int lengthDataBits = binaryData.length * EIGHTBIT;
259         int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
260         int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP;
261         byte encodedData[] = null;
262         int encodedDataLength = 0;
263         int nbrChunks = 0;
264 
265         if ( fewerThan24bits != 0 )
266         {
267             // data not divisible by 24 bit
268             encodedDataLength = ( numberTriplets + 1 ) * 4;
269         }
270         else
271         {
272             // 16 or 8 bit
273             encodedDataLength = numberTriplets * 4;
274         }
275 
276         // If the output is to be "chunked" into 76 character sections,
277         // for compliance with RFC 2045 MIME, then it is important to
278         // allow for extra length to account for the separator(s)
279         if ( isChunked )
280         {
281 
282             nbrChunks = ( CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math.ceil( (float) encodedDataLength / CHUNK_SIZE ) );
283             encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length;
284         }
285 
286         encodedData = new byte[encodedDataLength];
287 
288         byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0;
289 
290         int encodedIndex = 0;
291         int dataIndex = 0;
292         int i = 0;
293         int nextSeparatorIndex = CHUNK_SIZE;
294         int chunksSoFar = 0;
295 
296         // log.debug("number of triplets = " + numberTriplets);
297         for ( i = 0; i < numberTriplets; i++ )
298         {
299             dataIndex = i * 3;
300             b1 = binaryData[dataIndex];
301             b2 = binaryData[dataIndex + 1];
302             b3 = binaryData[dataIndex + 2];
303 
304             // log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
305 
306             l = (byte) ( b2 & 0x0f );
307             k = (byte) ( b1 & 0x03 );
308 
309             byte val1 = ( ( b1 & SIGN ) == 0 ) ? (byte) ( b1 >> 2 ) : (byte) ( ( b1 ) >> 2 ^ 0xc0 );
310             byte val2 = ( ( b2 & SIGN ) == 0 ) ? (byte) ( b2 >> 4 ) : (byte) ( ( b2 ) >> 4 ^ 0xf0 );
311             byte val3 = ( ( b3 & SIGN ) == 0 ) ? (byte) ( b3 >> 6 ) : (byte) ( ( b3 ) >> 6 ^ 0xfc );
312 
313             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
314             // log.debug( "val2 = " + val2 );
315             // log.debug( "k4 = " + (k<<4) );
316             // log.debug( "vak = " + (val2 | (k<<4)) );
317             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | ( k << 4 )];
318             encodedData[encodedIndex + 2] = lookUpBase64Alphabet[( l << 2 ) | val3];
319             encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
320 
321             encodedIndex += 4;
322 
323             // If we are chunking, let's put a chunk separator down.
324             if ( isChunked )
325             {
326                 // this assumes that CHUNK_SIZE % 4 == 0
327                 if ( encodedIndex == nextSeparatorIndex )
328                 {
329                     System.arraycopy( CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length );
330                     chunksSoFar++;
331                     nextSeparatorIndex =
332                         ( CHUNK_SIZE * ( chunksSoFar + 1 ) ) + ( chunksSoFar * CHUNK_SEPARATOR.length );
333                     encodedIndex += CHUNK_SEPARATOR.length;
334                 }
335             }
336         }
337 
338         // form integral number of 6-bit groups
339         dataIndex = i * 3;
340 
341         if ( fewerThan24bits == EIGHTBIT )
342         {
343             b1 = binaryData[dataIndex];
344             k = (byte) ( b1 & 0x03 );
345             // log.debug("b1=" + b1);
346             // log.debug("b1<<2 = " + (b1>>2) );
347             byte val1 = ( ( b1 & SIGN ) == 0 ) ? (byte) ( b1 >> 2 ) : (byte) ( ( b1 ) >> 2 ^ 0xc0 );
348             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
349             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
350             encodedData[encodedIndex + 2] = PAD;
351             encodedData[encodedIndex + 3] = PAD;
352         }
353         else if ( fewerThan24bits == SIXTEENBIT )
354         {
355 
356             b1 = binaryData[dataIndex];
357             b2 = binaryData[dataIndex + 1];
358             l = (byte) ( b2 & 0x0f );
359             k = (byte) ( b1 & 0x03 );
360 
361             byte val1 = ( ( b1 & SIGN ) == 0 ) ? (byte) ( b1 >> 2 ) : (byte) ( ( b1 ) >> 2 ^ 0xc0 );
362             byte val2 = ( ( b2 & SIGN ) == 0 ) ? (byte) ( b2 >> 4 ) : (byte) ( ( b2 ) >> 4 ^ 0xf0 );
363 
364             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
365             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | ( k << 4 )];
366             encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
367             encodedData[encodedIndex + 3] = PAD;
368         }
369 
370         if ( isChunked )
371         {
372             // we also add a separator to the end of the final chunk.
373             if ( chunksSoFar < nbrChunks )
374             {
375                 System.arraycopy( CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length,
376                                   CHUNK_SEPARATOR.length );
377             }
378         }
379 
380         return encodedData;
381     }
382 
383     /**
384      * Decodes Base64 data into octects
385      *
386      * @param base64Data Byte array containing Base64 data
387      * @return Array containing decoded data.
388      */
389     public static byte[] decodeBase64( byte[] base64Data )
390     {
391         // RFC 2045 requires that we discard ALL non-Base64 characters
392         base64Data = discardNonBase64( base64Data );
393 
394         // handle the edge case, so we don't have to worry about it later
395         if ( base64Data.length == 0 )
396         {
397             return new byte[0];
398         }
399 
400         int numberQuadruple = base64Data.length / FOURBYTE;
401         byte decodedData[] = null;
402         byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0;
403 
404         // Throw away anything not in base64Data
405 
406         int encodedIndex = 0;
407         int dataIndex = 0;
408         {
409             // this sizes the output array properly - rlw
410             int lastData = base64Data.length;
411             // ignore the '=' padding
412             while ( base64Data[lastData - 1] == PAD )
413             {
414                 if ( --lastData == 0 )
415                 {
416                     return new byte[0];
417                 }
418             }
419             decodedData = new byte[lastData - numberQuadruple];
420         }
421 
422         for ( int i = 0; i < numberQuadruple; i++ )
423         {
424             dataIndex = i * 4;
425             marker0 = base64Data[dataIndex + 2];
426             marker1 = base64Data[dataIndex + 3];
427 
428             b1 = base64Alphabet[base64Data[dataIndex]];
429             b2 = base64Alphabet[base64Data[dataIndex + 1]];
430 
431             if ( marker0 != PAD && marker1 != PAD )
432             {
433                 // No PAD e.g 3cQl
434                 b3 = base64Alphabet[marker0];
435                 b4 = base64Alphabet[marker1];
436 
437                 decodedData[encodedIndex] = (byte) ( b1 << 2 | b2 >> 4 );
438                 decodedData[encodedIndex + 1] = (byte) ( ( ( b2 & 0xf ) << 4 ) | ( ( b3 >> 2 ) & 0xf ) );
439                 decodedData[encodedIndex + 2] = (byte) ( b3 << 6 | b4 );
440             }
441             else if ( marker0 == PAD )
442             {
443                 // Two PAD e.g. 3c[Pad][Pad]
444                 decodedData[encodedIndex] = (byte) ( b1 << 2 | b2 >> 4 );
445             }
446             else if ( marker1 == PAD )
447             {
448                 // One PAD e.g. 3cQ[Pad]
449                 b3 = base64Alphabet[marker0];
450 
451                 decodedData[encodedIndex] = (byte) ( b1 << 2 | b2 >> 4 );
452                 decodedData[encodedIndex + 1] = (byte) ( ( ( b2 & 0xf ) << 4 ) | ( ( b3 >> 2 ) & 0xf ) );
453             }
454             encodedIndex += 3;
455         }
456         return decodedData;
457     }
458 
459     /**
460      * Discards any whitespace from a base-64 encoded block.
461      *
462      * @param data The base-64 encoded data to discard the whitespace from.
463      * @return The data, less whitespace (see RFC 2045).
464      */
465     static byte[] discardWhitespace( byte[] data )
466     {
467         byte groomedData[] = new byte[data.length];
468         int bytesCopied = 0;
469 
470         for ( byte aData : data )
471         {
472             switch ( aData )
473             {
474                 case (byte) ' ':
475                 case (byte) '\n':
476                 case (byte) '\r':
477                 case (byte) '\t':
478                     break;
479                 default:
480                     groomedData[bytesCopied++] = aData;
481             }
482         }
483 
484         byte packedData[] = new byte[bytesCopied];
485 
486         System.arraycopy( groomedData, 0, packedData, 0, bytesCopied );
487 
488         return packedData;
489     }
490 
491     /**
492      * Discards any characters outside of the base64 alphabet, per the requirements on page 25 of RFC 2045 - "Any
493      * characters outside of the base64 alphabet are to be ignored in base64 encoded data."
494      *
495      * @param data The base-64 encoded data to groom
496      * @return The data, less non-base64 characters (see RFC 2045).
497      */
498     static byte[] discardNonBase64( byte[] data )
499     {
500         byte groomedData[] = new byte[data.length];
501         int bytesCopied = 0;
502 
503         for ( byte aData : data )
504         {
505             if ( isBase64( aData ) )
506             {
507                 groomedData[bytesCopied++] = aData;
508             }
509         }
510 
511         byte packedData[] = new byte[bytesCopied];
512 
513         System.arraycopy( groomedData, 0, packedData, 0, bytesCopied );
514 
515         return packedData;
516     }
517 
518     /**
519      * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet.
520      *
521      * @param pArray a byte array containing binary data
522      * @return A byte array containing only Base64 character data
523      */
524     public byte[] encode( byte[] pArray )
525     {
526         return encodeBase64( pArray, false );
527     }
528 
529 }