View Javadoc
1   package org.apache.maven.surefire.util.internal;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.nio.ByteBuffer;
23  import java.nio.CharBuffer;
24  import java.nio.charset.CharacterCodingException;
25  import java.nio.charset.Charset;
26  import java.util.StringTokenizer;
27  
28  /**
29   * <p>
30   * Common {@link String java.lang.String} manipulation routines.
31   * </p>
32   * <br>
33   * <p>
34   * Originally from <a href="http://jakarta.apache.org/turbine/">Turbine</a> and the GenerationJavaCore library.
35   * </p>
36   * <br>
37   * NOTE: This class is not part of any api and is public purely for technical reasons !
38   *
39   * @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a>
40   * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a>
41   * @author <a href="mailto:gcoladonato@yahoo.com">Greg Coladonato</a>
42   * @author <a href="mailto:bayard@generationjava.com">Henri Yandell</a>
43   * @author <a href="mailto:ed@codehaus.org">Ed Korthof</a>
44   * @author <a href="mailto:rand_mcneely@yahoo.com">Rand McNeely</a>
45   * @author Stephen Colebourne
46   * @author <a href="mailto:fredrik@westermarck.com">Fredrik Westermarck</a>
47   * @author Holger Krauth
48   * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
49   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
50   * @version $Id: StringUtils.java 8001 2009-01-03 13:17:09Z vsiveton $
51   * @since 1.0
52   */
53  public final class StringUtils
54  {
55      public static final String NL = System.getProperty( "line.separator" );
56  
57      private static final byte[] HEX_CHARS = {
58                      '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
59  
60      private static final Charset DEFAULT_CHARSET = Charset.defaultCharset();
61  
62      /**
63       * TODO
64       * Use JDK7 StandardCharsets
65       */
66      public static final Charset US_ASCII = Charset.forName( "US-ASCII" );
67  
68      // 8-bit charset Latin-1
69      public static final Charset ISO_8859_1 = Charset.forName( "ISO-8859-1" );
70  
71      public static final Charset UTF_8 = Charset.forName( "UTF-8" );
72  
73      private StringUtils()
74      {
75          throw new IllegalStateException( "no instantiable constructor" );
76      }
77  
78      public static String[] split( String text, String separator )
79      {
80          final StringTokenizer tok;
81          if ( separator == null )
82          {
83              // Null separator means we're using StringTokenizer's default
84              // delimiter, which comprises all whitespace characters.
85              tok = new StringTokenizer( text );
86          }
87          else
88          {
89              tok = new StringTokenizer( text, separator );
90          }
91  
92          String[] list = new String[tok.countTokens()];
93          for ( int i = 0; tok.hasMoreTokens(); i++ )
94          {
95              list[i] = tok.nextToken();
96          }
97          return list;
98      }
99  
100     /**
101      * <p>
102      * Checks if a (trimmed) String is {@code null} or blank.
103      * </p>
104      *
105      * @param str the String to check
106      * @return {@code true} if the String is {@code null}, or length zero once trimmed
107      */
108     public static boolean isBlank( String str )
109     {
110         return str == null || str.trim().isEmpty();
111     }
112 
113     /**
114      * <p>
115      * Checks if a (trimmed) String is not {@code null} and not blank.
116      * </p>
117      *
118      * @param str the String to check
119      * @return {@code true} if the String is not {@code null} and length of trimmed {@code str} is not zero.
120      */
121     public static boolean isNotBlank( String str )
122     {
123         return !isBlank( str );
124     }
125 
126     /**
127      * Escape the specified string to a representation that only consists of nicely printable characters, without any
128      * newlines and without a comma.
129      * <p>
130      * The reverse-method is {@link #unescapeString(StringBuilder, CharSequence)}.
131      *
132      * @param target target string buffer. The required space will be up to {@code str.getBytes().length * 5} chars.
133      * @param str String to escape values in, may be {@code null}.
134      */
135     @SuppressWarnings( "checkstyle:magicnumber" )
136     public static void escapeToPrintable( StringBuilder target, CharSequence str )
137     {
138         if ( target == null )
139         {
140             throw new IllegalArgumentException( "The target buffer must not be null" );
141         }
142         if ( str == null )
143         {
144             return;
145         }
146 
147         for ( int i = 0; i < str.length(); i++ )
148         {
149             char c = str.charAt( i );
150 
151             // handle non-nicely printable chars and the comma
152             if ( c < 32 || c > 126 || c == '\\' || c == ',' )
153             {
154                 target.append( '\\' );
155                 target.append( (char) HEX_CHARS[( 0xF000 & c ) >> 12] );
156                 target.append( (char) HEX_CHARS[( 0x0F00 & c ) >> 8] );
157                 target.append( (char) HEX_CHARS[( 0x00F0 & c ) >> 4] );
158                 target.append( (char) HEX_CHARS[( 0x000F & c )] );
159             }
160             else
161             {
162                 target.append( c );
163             }
164         }
165     }
166 
167     /**
168      * Reverses the effect of {@link #escapeToPrintable(StringBuilder, CharSequence)}.
169      *
170      * @param target target string buffer
171      * @param str the String to un-escape, as created by {@link #escapeToPrintable(StringBuilder, CharSequence)}
172      */
173     public static void unescapeString( StringBuilder target, CharSequence str )
174     {
175         if ( target == null )
176         {
177             throw new IllegalArgumentException( "The target buffer must not be null" );
178         }
179         if ( str == null )
180         {
181             return;
182         }
183 
184         for ( int i = 0; i < str.length(); i++ )
185         {
186             char ch = str.charAt( i );
187 
188             if ( ch == '\\' )
189             {
190                 target.append( (char) (
191                                   digit( str.charAt( ++i ) ) << 12
192                                 | digit( str.charAt( ++i ) ) << 8
193                                 | digit( str.charAt( ++i ) ) << 4
194                                 | digit( str.charAt( ++i ) )
195                                 ) );
196             }
197             else
198             {
199                 target.append( ch );
200             }
201         }
202     }
203 
204     private static int digit( char ch )
205     {
206         if ( ch >= 'a' )
207         {
208             return 10 + ch - 'a';
209         }
210         else if ( ch >= 'A' )
211         {
212             return 10 + ch - 'A';
213         }
214         else
215         {
216             return ch - '0';
217         }
218     }
219 
220     /**
221      * Escapes the bytes in the array {@code input} to contain only 'printable' bytes.
222      * <br>
223      * Escaping is done by encoding the non-nicely printable bytes to {@code '\' + upperCaseHexBytes(byte)}.
224      * <br>
225      * The reverse-method is {@link #unescapeBytes(String, String)}.
226      * <br>
227      * The returned byte array is started with aligned sequence {@code header} and finished by {@code \n}.
228      *
229      * @param header prefix header
230      * @param input input buffer
231      * @param off offset in the input buffer
232      * @param len number of bytes to copy from the input buffer
233      * @return number of bytes written to {@code out}
234      * @throws NullPointerException if the specified parameter {@code header} or {@code input} is null
235      * @throws IndexOutOfBoundsException if {@code off} or {@code len} is out of range
236      *         ({@code off < 0 || len < 0 || off >= input.length || len > input.length || off > len})
237      */
238     @SuppressWarnings( "checkstyle:magicnumber" )
239     public static EncodedArray escapeBytesToPrintable( final byte[] header, final byte[] input, final int off,
240                                                        final int len )
241     {
242         if ( off < 0 || len < 0 || off >= input.length || len > input.length || off > len )
243         {
244             throw new IndexOutOfBoundsException(
245                     "off < 0 || len < 0 || off >= input.length || len > input.length || off > len" );
246         }
247         // Hex-escaping can be up to 3 times length of a regular byte. Last character is '\n', see (+1).
248         final byte[] encodeBytes = new byte[header.length + 3 * len + 1];
249         System.arraycopy( header, 0, encodeBytes, 0, header.length );
250         int outputPos = header.length;
251         final int end = off + len;
252         for ( int i = off; i < end; i++ )
253         {
254             final byte b = input[i];
255 
256             // handle non-nicely printable bytes
257             if ( b < 32 || b > 126 || b == '\\' || b == ',' )
258             {
259                 final int upper = ( 0xF0 & b ) >> 4;
260                 final int lower = ( 0x0F & b );
261                 encodeBytes[outputPos++] = '\\';
262                 encodeBytes[outputPos++] = HEX_CHARS[upper];
263                 encodeBytes[outputPos++] = HEX_CHARS[lower];
264             }
265             else
266             {
267                 encodeBytes[outputPos++] = b;
268             }
269         }
270         encodeBytes[outputPos++] = (byte) '\n';
271 
272         return new EncodedArray( encodeBytes, outputPos );
273     }
274 
275     /**
276      * Reverses the effect of {@link #escapeBytesToPrintable(byte[], byte[], int, int)}.
277      *
278      * @param str the input String
279      * @param charsetName the charset name
280      * @return the number of bytes written to {@code out}
281      */
282     public static ByteBuffer unescapeBytes( String str, String charsetName  )
283     {
284         int outPos = 0;
285 
286         if ( str == null )
287         {
288             return ByteBuffer.wrap( new byte[0] );
289         }
290 
291         byte[] out = new byte[str.length()];
292         for ( int i = 0; i < str.length(); i++ )
293         {
294             char ch = str.charAt( i );
295 
296             if ( ch == '\\' )
297             {
298                 int upper = digit( str.charAt( ++i ) );
299                 int lower = digit( str.charAt( ++i ) );
300                 out[outPos++] = (byte) ( upper << 4 | lower );
301             }
302             else
303             {
304                 out[outPos++] = (byte) ch;
305             }
306         }
307 
308         Charset sourceCharset = Charset.forName( charsetName );
309         if ( !DEFAULT_CHARSET.equals( sourceCharset ) )
310         {
311             CharBuffer decodedFromSourceCharset;
312             try
313             {
314                 decodedFromSourceCharset = sourceCharset.newDecoder().decode( ByteBuffer.wrap( out, 0, outPos ) );
315                 return DEFAULT_CHARSET.encode( decodedFromSourceCharset );
316             }
317             catch ( CharacterCodingException e )
318             {
319                 // ignore and fall through to the non-recoded version
320             }
321         }
322 
323         return ByteBuffer.wrap( out, 0, outPos );
324     }
325 
326     public static byte[] encodeStringForForkCommunication( String string )
327     {
328         return string.getBytes( ISO_8859_1 );
329     }
330 
331     /**
332      * Determines if {@code buffer} starts with specific literal(s).
333      *
334      * @param buffer     Examined StringBuffer
335      * @param pattern    a pattern which should start in {@code buffer}
336      * @return    {@code true} if buffer's literal starts with given {@code pattern}, or both are empty.
337      */
338     public static boolean startsWith( StringBuffer buffer, String pattern )
339     {
340         if ( buffer.length() < pattern.length() )
341         {
342             return false;
343         }
344         else
345         {
346             for ( int i = 0, len = pattern.length(); i < len; i++ )
347             {
348                 if ( buffer.charAt( i ) != pattern.charAt( i ) )
349                 {
350                     return false;
351                 }
352             }
353             return true;
354         }
355     }
356 
357     /**
358      * Escaped string to byte array with offset 0 and certain length.
359      */
360     public static final class EncodedArray
361     {
362         private final byte[] array;
363         private final int size;
364 
365         private EncodedArray( byte[] array, int size )
366         {
367             this.array = array;
368             this.size = size;
369         }
370 
371         public byte[] getArray()
372         {
373             return array;
374         }
375 
376         public int getSize()
377         {
378             return size;
379         }
380     }
381 }