View Javadoc

1   package org.apache.maven.surefire.util.internal;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.io.PrintStream;
24  import java.io.StringWriter;
25  import java.io.Writer;
26  import java.util.StringTokenizer;
27  import org.apache.maven.surefire.util.NestedRuntimeException;
28  
29  /**
30   * <p>Common <code>String</code> manipulation routines.</p>
31   * <p/>
32   * <p>Originally from
33   * <a href="http://jakarta.apache.org/turbine/">Turbine</a> and the
34   * GenerationJavaCore library.</p>
35   *
36   * @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a>
37   * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a>
38   * @author <a href="mailto:gcoladonato@yahoo.com">Greg Coladonato</a>
39   * @author <a href="mailto:bayard@generationjava.com">Henri Yandell</a>
40   * @author <a href="mailto:ed@codehaus.org">Ed Korthof</a>
41   * @author <a href="mailto:rand_mcneely@yahoo.com">Rand McNeely</a>
42   * @author Stephen Colebourne
43   * @author <a href="mailto:fredrik@westermarck.com">Fredrik Westermarck</a>
44   * @author Holger Krauth
45   * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
46   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
47   * @version $Id: StringUtils.java 8001 2009-01-03 13:17:09Z vsiveton $
48   * @noinspection JavaDoc
49   * <p/>
50   * A quick borrow from plexus-utils by Kristian Rosenvold, to restore jdk1.3 compat
51   * Threw away all the unused stuff.
52   * <p/>
53   * NOTE: This class is not part of any api and is public purely for technical reasons !
54   * @since 1.0
55   */
56  public class StringUtils
57  {
58  
59      // Splitting
60      //--------------------------------------------------------------------------
61  
62      public static String[] split( String text, String separator )
63      {
64          int max = -1;
65          StringTokenizer tok;
66          if ( separator == null )
67          {
68              // Null separator means we're using StringTokenizer's default
69              // delimiter, which comprises all whitespace characters.
70              tok = new StringTokenizer( text );
71          }
72          else
73          {
74              tok = new StringTokenizer( text, separator );
75          }
76  
77          int listSize = tok.countTokens();
78          if ( ( max > 0 ) && ( listSize > max ) )
79          {
80              listSize = max;
81          }
82  
83          String[] list = new String[listSize];
84          int i = 0;
85          int lastTokenBegin;
86          int lastTokenEnd = 0;
87          while ( tok.hasMoreTokens() )
88          {
89              if ( ( max > 0 ) && ( i == listSize - 1 ) )
90              {
91                  // In the situation where we hit the max yet have
92                  // tokens left over in our input, the last list
93                  // element gets all remaining text.
94                  String endToken = tok.nextToken();
95                  lastTokenBegin = text.indexOf( endToken, lastTokenEnd );
96                  list[i] = text.substring( lastTokenBegin );
97                  break;
98              }
99              else
100             {
101                 list[i] = tok.nextToken();
102                 lastTokenBegin = text.indexOf( list[i], lastTokenEnd );
103                 lastTokenEnd = lastTokenBegin + list[i].length();
104             }
105             i++;
106         }
107         return list;
108     }
109 
110 
111     /**
112      * <p>Checks if a (trimmed) String is <code>null</code> or blank.</p>
113      *
114      * @param str the String to check
115      * @return <code>true</code> if the String is <code>null</code>, or
116      *         length zero once trimmed
117      */
118     public static boolean isBlank( String str )
119     {
120         return ( ( str == null ) || ( str.trim().length() == 0 ) );
121     }
122 
123 
124     // Ripped from commons-lang StringEscapeUtils. Maybe Use dependency instead
125     public static void unescapeJava( StringWriter out, String str )
126     {
127         if ( out == null )
128         {
129             throw new IllegalArgumentException( "The Writer must not be null" );
130         }
131         if ( str == null )
132         {
133             return;
134         }
135         int sz = str.length();
136         StringBuffer unicode = new StringBuffer( 4 );
137         boolean hadSlash = false;
138         boolean inUnicode = false;
139         for ( int i = 0; i < sz; i++ )
140         {
141             char ch = str.charAt( i );
142             if ( inUnicode )
143             {
144                 // if in unicode, then we're reading unicode
145                 // values in somehow
146                 unicode.append( ch );
147                 if ( unicode.length() == 4 )
148                 {
149                     // unicode now contains the four hex digits
150                     // which represents our unicode character
151                     try
152                     {
153                         int value = Integer.parseInt( unicode.toString(), 16 );
154                         out.write( (char) value );
155                         unicode.setLength( 0 );
156                         inUnicode = false;
157                         hadSlash = false;
158                     }
159                     catch ( NumberFormatException nfe )
160                     {
161                         throw new NestedRuntimeException( "Unable to parse unicode value: " + unicode, nfe );
162                     }
163                 }
164                 continue;
165             }
166             if ( hadSlash )
167             {
168                 // handle an escaped value
169                 hadSlash = false;
170                 switch ( ch )
171                 {
172                     case '\\':
173                         out.write( '\\' );
174                         break;
175                     case '\'':
176                         out.write( '\'' );
177                         break;
178                     case '\"':
179                         out.write( '"' );
180                         break;
181                     case 'r':
182                         out.write( '\r' );
183                         break;
184                     case 'f':
185                         out.write( '\f' );
186                         break;
187                     case 't':
188                         out.write( '\t' );
189                         break;
190                     case 'n':
191                         out.write( '\n' );
192                         break;
193                     case 'b':
194                         out.write( '\b' );
195                         break;
196                     case 'u':
197                     {
198                         // uh-oh, we're in unicode country....
199                         inUnicode = true;
200                         break;
201                     }
202                     default:
203                         out.write( ch );
204                         break;
205                 }
206                 continue;
207             }
208             else if ( ch == '\\' )
209             {
210                 hadSlash = true;
211                 continue;
212             }
213             out.write( ch );
214         }
215         if ( hadSlash )
216         {
217             // then we're in the weird case of a \ at the end of the
218             // string, let's output it anyway.
219             out.write( '\\' );
220         }
221     }
222 
223     // Ripped from commons-lang StringEscapeUtils. Maybe Use dependency instead
224     public static int unescapeJava( byte[] out, String str )
225     {
226         int outPos = 0;
227         if ( out == null )
228         {
229             throw new IllegalArgumentException( "The Writer must not be null" );
230         }
231         if ( str == null )
232         {
233             return 0;
234         }
235         int sz = str.length();
236         StringBuffer unicode = new StringBuffer( 4 );
237         boolean hadSlash = false;
238         boolean inUnicode = false;
239         for ( int i = 0; i < sz; i++ )
240         {
241             char ch = str.charAt( i );
242             if ( inUnicode )
243             {
244                 // if in unicode, then we're reading unicode
245                 // values in somehow
246                 unicode.append( ch );
247                 if ( unicode.length() == 4 )
248                 {
249                     // unicode now contains the four hex digits
250                     // which represents our unicode character
251                     try
252                     {
253                         int value = Integer.parseInt( unicode.toString(), 16 );
254                         out[outPos++] = (byte) value;
255                         unicode.setLength( 0 );
256                         inUnicode = false;
257                         hadSlash = false;
258                     }
259                     catch ( NumberFormatException nfe )
260                     {
261                         throw new NestedRuntimeException( "Unable to parse unicode value: " + unicode, nfe );
262                     }
263                 }
264                 continue;
265             }
266             if ( hadSlash )
267             {
268                 // handle an escaped value
269                 hadSlash = false;
270                 switch ( ch )
271                 {
272                     case '\\':
273                         out[outPos++] = '\\';
274                         break;
275                     case '\'':
276                         out[outPos++] = '\'';
277                         break;
278                     case '\"':
279                         out[outPos++] = '"';
280                         break;
281                     case 'r':
282                         out[outPos++] = '\r';
283                         break;
284                     case 'f':
285                         out[outPos++] = '\f';
286                         break;
287                     case 't':
288                         out[outPos++] = '\t';
289                         break;
290                     case 'n':
291                         out[outPos++] = '\n';
292                         break;
293                     case 'b':
294                         out[outPos++] = '\b';
295                         break;
296                     case 'u':
297                     {
298                         // uh-oh, we're in unicode country....
299                         inUnicode = true;
300                         break;
301                     }
302                     default:
303                         out[outPos++] = (byte) ch;
304                         break;
305                 }
306                 continue;
307             }
308             else if ( ch == '\\' )
309             {
310                 hadSlash = true;
311                 continue;
312             }
313             out[outPos++] = (byte) ch;
314         }
315         if ( hadSlash )
316         {
317             // then we're in the weird case of a \ at the end of the
318             // string, let's output it anyway.
319             out[outPos++] = '\\';
320         }
321         return outPos;
322     }
323 
324     // Ripped from commons-lang StringEscapeUtils. With a minor modification, we unicode-quote commas
325     // to avoid csv decoding problems ;)
326 
327     /**
328      * @param out               write to receieve the escaped string
329      * @param str               String to escape values in, may be null
330      * @param escapeSingleQuote escapes single quotes if <code>true</code>
331      * @throws java.io.IOException if an IOException occurs
332      */
333     public static void escapeJavaStyleString( Writer out, String str, boolean escapeSingleQuote )
334         throws IOException
335     {
336         if ( out == null )
337         {
338             throw new IllegalArgumentException( "The Writer must not be null" );
339         }
340         if ( str == null )
341         {
342             return;
343         }
344         int sz;
345         sz = str.length();
346         for ( int i = 0; i < sz; i++ )
347         {
348             char ch = str.charAt( i );
349 
350             // handle unicode
351             if ( ch > 0xfff )
352             {
353                 out.write( "\\u" + hex( ch ) );
354             }
355             else if ( ch > 0xff )
356             {
357                 out.write( "\\u0" + hex( ch ) );
358             }
359             else if ( ch > 0x7f || ch == ',' )
360             {    // Kr - this line modified from commons
361                 out.write( "\\u00" + hex( ch ) );
362             }
363             else if ( ch < 32 )
364             {
365                 switch ( ch )
366                 {
367                     case '\b':
368                         out.write( '\\' );
369                         out.write( 'b' );
370                         break;
371                     case '\n':
372                         out.write( '\\' );
373                         out.write( 'n' );
374                         break;
375                     case '\t':
376                         out.write( '\\' );
377                         out.write( 't' );
378                         break;
379                     case '\f':
380                         out.write( '\\' );
381                         out.write( 'f' );
382                         break;
383                     case '\r':
384                         out.write( '\\' );
385                         out.write( 'r' );
386                         break;
387                     default:
388                         if ( ch > 0xf )
389                         {
390                             out.write( "\\u00" + hex( ch ) );
391                         }
392                         else
393                         {
394                             out.write( "\\u000" + hex( ch ) );
395                         }
396                         break;
397                 }
398             }
399             else
400             {
401                 switch ( ch )
402                 {
403                     case '\'':
404                         if ( escapeSingleQuote )
405                         {
406                             out.write( '\\' );
407                         }
408                         out.write( '\'' );
409                         break;
410                     case '"':
411                         out.write( '\\' );
412                         out.write( '"' );
413                         break;
414                     case '\\':
415                         out.write( '\\' );
416                         out.write( '\\' );
417                         break;
418                     case '/':
419                         out.write( '\\' );
420                         out.write( '/' );
421                         break;
422                     default:
423                         out.write( ch );
424                         break;
425                 }
426             }
427         }
428     }
429 
430     public static void escapeJavaStyleString( ByteBuffer out, byte[] str, int off, int len )
431     {
432         if ( out == null )
433         {
434             throw new IllegalArgumentException( "The Writer must not be null" );
435         }
436         final int inputLength = str.length;
437         if ( str == null || inputLength == 0 )
438         {
439             return;
440         }
441         int outputPos = 0;
442         int end = off + len;
443         for ( int i = off; i < end; i++ )
444         {
445             char ch = (char) str[i];
446 
447             // handle unicode
448             if ( ch > 0xfff )
449             {
450                 outputPos = writeOut( out, outputPos, "\\u" + hex( ch ) );
451             }
452             else if ( ch > 0xff )
453             {
454                 outputPos = writeOut( out, outputPos, "\\u0" + hex( ch ) );
455             }
456             else if ( ch > 0x7f || ch == ',' )
457             {    // Kr - this line modified from commons
458                 outputPos = writeOut( out, outputPos, "\\u00" + hex( ch ) );
459             }
460             else if ( ch < 32 )
461             {
462                 switch ( ch )
463                 {
464                     case '\b':
465                         out.append( '\\' );
466                         out.append( 'b' );
467                         break;
468                     case '\n':
469                         out.append( '\\' );
470                         out.append( 'n' );
471                         break;
472                     case '\t':
473                         out.append( '\\' );
474                         out.append( 't' );
475                         break;
476                     case '\f':
477                         out.append( '\\' );
478                         out.append( 'f' );
479                         break;
480                     case '\r':
481                         out.append( '\\' );
482                         out.append( 'r' );
483                         break;
484                     default:
485                         if ( ch > 0xf )
486                         {
487                             outputPos = writeOut( out, outputPos, "\\u00" + hex( ch ) );
488                         }
489                         else
490                         {
491                             outputPos = writeOut( out, outputPos, "\\u000" + hex( ch ) );
492                         }
493                         break;
494                 }
495             }
496             else
497             {
498                 switch ( ch )
499                 {
500                     case '\'':
501                         out.append( '\\' );
502                         out.append( '\'' );
503                         break;
504                     case '"':
505                         out.append( '\\' );
506                         out.append( '"' );
507                         break;
508                     case '\\':
509                         out.append( '\\' );
510                         out.append( '\\' );
511                         break;
512                     case '/':
513                         out.append( '\\' );
514                         out.append( '/' );
515                         break;
516                     default:
517                         out.append( ch );
518                         break;
519                 }
520             }
521         }
522     }
523 
524     public static void escapeJavaStyleString( PrintStream out, byte[] str, int off, int len )
525     {
526         if ( out == null )
527         {
528             throw new IllegalArgumentException( "The Writer must not be null" );
529         }
530         final int inputLength = str.length;
531         if ( str == null || inputLength == 0 )
532         {
533             return;
534         }
535         int outputPos = 0;
536         int end = off + len;
537         for ( int i = off; i < end; i++ )
538         {
539             char ch = (char) str[i];
540 
541             // handle unicode
542             if ( ch > 0xfff )
543             {
544                 outputPos = writeOut( out, outputPos, "\\u" + hex( ch ) );
545             }
546             else if ( ch > 0xff )
547             {
548                 outputPos = writeOut( out, outputPos, "\\u0" + hex( ch ) );
549             }
550             else if ( ch > 0x7f || ch == ',' )
551             {    // Kr - this line modified from commons
552                 outputPos = writeOut( out, outputPos, "\\u00" + hex( ch ) );
553             }
554             else if ( ch < 32 )
555             {
556                 switch ( ch )
557                 {
558                     case '\b':
559                         out.append( '\\' );
560                         out.append( 'b' );
561                         break;
562                     case '\n':
563                         out.append( '\\' );
564                         out.append( 'n' );
565                         break;
566                     case '\t':
567                         out.append( '\\' );
568                         out.append( 't' );
569                         break;
570                     case '\f':
571                         out.append( '\\' );
572                         out.append( 'f' );
573                         break;
574                     case '\r':
575                         out.append( '\\' );
576                         out.append( 'r' );
577                         break;
578                     default:
579                         if ( ch > 0xf )
580                         {
581                             outputPos = writeOut( out, outputPos, "\\u00" + hex( ch ) );
582                         }
583                         else
584                         {
585                             outputPos = writeOut( out, outputPos, "\\u000" + hex( ch ) );
586                         }
587                         break;
588                 }
589             }
590             else
591             {
592                 switch ( ch )
593                 {
594                     case '\'':
595                         out.append( '\\' );
596                         out.append( '\'' );
597                         break;
598                     case '"':
599                         out.append( '\\' );
600                         out.append( '"' );
601                         break;
602                     case '\\':
603                         out.append( '\\' );
604                         out.append( '\\' );
605                         break;
606                     case '/':
607                         out.append( '\\' );
608                         out.append( '/' );
609                         break;
610                     default:
611                         out.append( ch );
612                         break;
613                 }
614             }
615         }
616     }
617 
618     public static int escapeJavaStyleString( byte[] out, int outoff, byte[] str, int off, int len )
619     {
620         if ( out == null )
621         {
622             throw new IllegalArgumentException( "The Writer must not be null" );
623         }
624         final int inputLength = str.length;
625         if ( str == null || inputLength == 0 )
626         {
627             return 0;
628         }
629         int outputPos = outoff;
630         int end = off + len;
631         for ( int i = off; i < end; i++ )
632         {
633             char ch = (char) str[i];
634 
635             // handle unicode
636             if ( ch > 0xfff )
637             {
638                 outputPos = writeOut( out, outputPos, "\\u" + hex( ch ) );
639             }
640             else if ( ch > 0xff )
641             {
642                 outputPos = writeOut( out, outputPos, "\\u0" + hex( ch ) );
643             }
644             else if ( ch > 0x7f || ch == ',' )
645             {    // Kr - this line modified from commons
646                 outputPos = writeOut( out, outputPos, "\\u00" + hex( ch ) );
647             }
648             else if ( ch < 32 )
649             {
650                 switch ( ch )
651                 {
652                     case '\b':
653                         out[outputPos++] = '\\';
654                         out[outputPos++] = 'b';
655                         break;
656                     case '\n':
657                         out[outputPos++] = '\\';
658                         out[outputPos++] = 'n';
659                         break;
660                     case '\t':
661                         out[outputPos++] = '\\';
662                         out[outputPos++] = 't';
663                         break;
664                     case '\f':
665                         out[outputPos++] = '\\';
666                         out[outputPos++] = 'f';
667                         break;
668                     case '\r':
669                         out[outputPos++] = '\\';
670                         out[outputPos++] = 'r';
671                         break;
672                     default:
673                         if ( ch > 0xf )
674                         {
675                             outputPos = writeOut( out, outputPos, "\\u00" + hex( ch ) );
676                         }
677                         else
678                         {
679                             outputPos = writeOut( out, outputPos, "\\u000" + hex( ch ) );
680                         }
681                         break;
682                 }
683             }
684             else
685             {
686                 switch ( ch )
687                 {
688                     case '\'':
689                         out[outputPos++] = '\\';
690                         out[outputPos++] = '\'';
691                         break;
692                     case '"':
693                         out[outputPos++] = '\\';
694                         out[outputPos++] = '"';
695                         break;
696                     case '\\':
697                         out[outputPos++] = '\\';
698                         out[outputPos++] = '\\';
699                         break;
700                     case '/':
701                         out[outputPos++] = '\\';
702                         out[outputPos++] = '/';
703                         break;
704                     default:
705                         out[outputPos++] = (byte) ch;
706                         break;
707                 }
708             }
709         }
710         return outputPos - outoff;
711     }
712 
713     private static int writeOut( ByteBuffer out, int outputPos, final String msg )
714     {
715         byte[] bytes = msg.getBytes();
716         for ( int cnt = 0; cnt < bytes.length; cnt++ )
717         {
718             out.append( bytes[cnt] );
719         }
720         return outputPos;
721     }
722 
723     private static int writeOut( PrintStream out, int outputPos, final String msg )
724     {
725         byte[] bytes = msg.getBytes();
726         for ( int cnt = 0; cnt < bytes.length; cnt++ )
727         {
728             out.write( bytes[cnt] );
729         }
730         return outputPos;
731     }
732 
733 
734     private static int writeOut( byte[] out, int outputPos, final String msg )
735     {
736         byte[] bytes = msg.getBytes();
737         for ( int cnt = 0; cnt < bytes.length; cnt++ )
738         {
739             out[outputPos++] = bytes[cnt];
740         }
741         return outputPos;
742     }
743 
744 
745     public static String hex( char ch )
746     {
747         return Integer.toHexString( ch ).toUpperCase();
748     }
749 
750     /**
751      * Courtesy of commons-lang StringEscapeUtils, slightly modified, see below
752      *
753      * @param str String to escape values in, may be null
754      * @return the escaped string
755      */
756     public static void escapeJavaStyleString( StringBuffer target, String str )
757     {
758         if ( str == null )
759         {
760             return;
761         }
762         try
763         {
764             StringWriter writer = new StringWriter( str.length() * 2 );
765             escapeJavaStyleString( writer, str, true );
766             target.append( writer.toString() ); // todo: be bit smarter
767         }
768         catch ( IOException ioe )
769         {
770             // this should never ever happen while writing to a StringWriter
771             ioe.printStackTrace();
772         }
773     }
774 
775     public static void escapeJavaStyleString( PrintStream target, String str )
776     {
777         if ( str == null )
778         {
779             return;
780         }
781         try
782         {
783             StringWriter writer = new StringWriter( str.length() * 2 );
784             escapeJavaStyleString( writer, str, true );
785             target.append( writer.toString() ); // todo: be bit smarter
786         }
787         catch ( IOException ioe )
788         {
789             // this should never ever happen while writing to a StringWriter
790             ioe.printStackTrace();
791         }
792     }
793 }
794