1 package org.apache.maven.surefire.util.internal;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 import java.nio.ByteBuffer;
23 import java.nio.CharBuffer;
24 import java.nio.charset.CharacterCodingException;
25 import java.nio.charset.Charset;
26 import java.util.StringTokenizer;
27
28 /**
29 * <p>
30 * Common {@link String java.lang.String} manipulation routines.
31 * </p>
32 * <br>
33 * <p>
34 * Originally from <a href="http://jakarta.apache.org/turbine/">Turbine</a> and the GenerationJavaCore library.
35 * </p>
36 * <br>
37 * NOTE: This class is not part of any api and is public purely for technical reasons !
38 *
39 * @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a>
40 * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a>
41 * @author <a href="mailto:gcoladonato@yahoo.com">Greg Coladonato</a>
42 * @author <a href="mailto:bayard@generationjava.com">Henri Yandell</a>
43 * @author <a href="mailto:ed@codehaus.org">Ed Korthof</a>
44 * @author <a href="mailto:rand_mcneely@yahoo.com">Rand McNeely</a>
45 * @author Stephen Colebourne
46 * @author <a href="mailto:fredrik@westermarck.com">Fredrik Westermarck</a>
47 * @author Holger Krauth
48 * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
49 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
50 * @version $Id: StringUtils.java 8001 2009-01-03 13:17:09Z vsiveton $
51 * @since 1.0
52 */
53 public final class StringUtils
54 {
55 public static final String NL = System.getProperty( "line.separator" );
56
57 private static final byte[] HEX_CHARS = {
58 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
59
60 private static final Charset DEFAULT_CHARSET = Charset.defaultCharset();
61
62 /**
63 * TODO
64 * Use JDK7 StandardCharsets
65 */
66 public static final Charset US_ASCII = Charset.forName( "US-ASCII" );
67
68 // 8-bit charset Latin-1
69 public static final Charset ISO_8859_1 = Charset.forName( "ISO-8859-1" );
70
71 public static final Charset UTF_8 = Charset.forName( "UTF-8" );
72
73 private StringUtils()
74 {
75 throw new IllegalStateException( "no instantiable constructor" );
76 }
77
78 public static String[] split( String text, String separator )
79 {
80 final StringTokenizer tok;
81 if ( separator == null )
82 {
83 // Null separator means we're using StringTokenizer's default
84 // delimiter, which comprises all whitespace characters.
85 tok = new StringTokenizer( text );
86 }
87 else
88 {
89 tok = new StringTokenizer( text, separator );
90 }
91
92 String[] list = new String[tok.countTokens()];
93 for ( int i = 0; tok.hasMoreTokens(); i++ )
94 {
95 list[i] = tok.nextToken();
96 }
97 return list;
98 }
99
100 /**
101 * <p>
102 * Checks if a (trimmed) String is {@code null} or blank.
103 * </p>
104 *
105 * @param str the String to check
106 * @return {@code true} if the String is {@code null}, or length zero once trimmed
107 */
108 public static boolean isBlank( String str )
109 {
110 return str == null || str.trim().isEmpty();
111 }
112
113 /**
114 * <p>
115 * Checks if a (trimmed) String is not {@code null} and not blank.
116 * </p>
117 *
118 * @param str the String to check
119 * @return {@code true} if the String is not {@code null} and length of trimmed {@code str} is not zero.
120 */
121 public static boolean isNotBlank( String str )
122 {
123 return !isBlank( str );
124 }
125
126 /**
127 * Escape the specified string to a representation that only consists of nicely printable characters, without any
128 * newlines and without a comma.
129 * <p>
130 * The reverse-method is {@link #unescapeString(StringBuilder, CharSequence)}.
131 *
132 * @param target target string buffer. The required space will be up to {@code str.getBytes().length * 5} chars.
133 * @param str String to escape values in, may be {@code null}.
134 */
135 @SuppressWarnings( "checkstyle:magicnumber" )
136 public static void escapeToPrintable( StringBuilder target, CharSequence str )
137 {
138 if ( target == null )
139 {
140 throw new IllegalArgumentException( "The target buffer must not be null" );
141 }
142 if ( str == null )
143 {
144 return;
145 }
146
147 for ( int i = 0; i < str.length(); i++ )
148 {
149 char c = str.charAt( i );
150
151 // handle non-nicely printable chars and the comma
152 if ( c < 32 || c > 126 || c == '\\' || c == ',' )
153 {
154 target.append( '\\' );
155 target.append( (char) HEX_CHARS[( 0xF000 & c ) >> 12] );
156 target.append( (char) HEX_CHARS[( 0x0F00 & c ) >> 8] );
157 target.append( (char) HEX_CHARS[( 0x00F0 & c ) >> 4] );
158 target.append( (char) HEX_CHARS[( 0x000F & c )] );
159 }
160 else
161 {
162 target.append( c );
163 }
164 }
165 }
166
167 /**
168 * Reverses the effect of {@link #escapeToPrintable(StringBuilder, CharSequence)}.
169 *
170 * @param target target string buffer
171 * @param str the String to un-escape, as created by {@link #escapeToPrintable(StringBuilder, CharSequence)}
172 */
173 public static void unescapeString( StringBuilder target, CharSequence str )
174 {
175 if ( target == null )
176 {
177 throw new IllegalArgumentException( "The target buffer must not be null" );
178 }
179 if ( str == null )
180 {
181 return;
182 }
183
184 for ( int i = 0; i < str.length(); i++ )
185 {
186 char ch = str.charAt( i );
187
188 if ( ch == '\\' )
189 {
190 target.append( (char) (
191 digit( str.charAt( ++i ) ) << 12
192 | digit( str.charAt( ++i ) ) << 8
193 | digit( str.charAt( ++i ) ) << 4
194 | digit( str.charAt( ++i ) )
195 ) );
196 }
197 else
198 {
199 target.append( ch );
200 }
201 }
202 }
203
204 private static int digit( char ch )
205 {
206 if ( ch >= 'a' )
207 {
208 return 10 + ch - 'a';
209 }
210 else if ( ch >= 'A' )
211 {
212 return 10 + ch - 'A';
213 }
214 else
215 {
216 return ch - '0';
217 }
218 }
219
220 /**
221 * Escapes the bytes in the array {@code input} to contain only 'printable' bytes.
222 * <br>
223 * Escaping is done by encoding the non-nicely printable bytes to {@code '\' + upperCaseHexBytes(byte)}.
224 * <br>
225 * The reverse-method is {@link #unescapeBytes(String, String)}.
226 * <br>
227 * The returned byte array is started with aligned sequence {@code header} and finished by {@code \n}.
228 *
229 * @param header prefix header
230 * @param input input buffer
231 * @param off offset in the input buffer
232 * @param len number of bytes to copy from the input buffer
233 * @return number of bytes written to {@code out}
234 * @throws NullPointerException if the specified parameter {@code header} or {@code input} is null
235 * @throws IndexOutOfBoundsException if {@code off} or {@code len} is out of range
236 * ({@code off < 0 || len < 0 || off >= input.length || len > input.length || off + len > input.length})
237 */
238 @SuppressWarnings( "checkstyle:magicnumber" )
239 public static EncodedArray escapeBytesToPrintable( final byte[] header, final byte[] input, final int off,
240 final int len )
241 {
242 if ( input.length == 0 )
243 {
244 return EncodedArray.EMPTY;
245 }
246 if ( off < 0 || len < 0 || off >= input.length || len > input.length || off + len > input.length )
247 {
248 throw new IndexOutOfBoundsException(
249 "off < 0 || len < 0 || off >= input.length || len > input.length || off + len > input.length" );
250 }
251 // Hex-escaping can be up to 3 times length of a regular byte. Last character is '\n', see (+1).
252 final byte[] encodeBytes = new byte[header.length + 3 * len + 1];
253 System.arraycopy( header, 0, encodeBytes, 0, header.length );
254 int outputPos = header.length;
255 final int end = off + len;
256 for ( int i = off; i < end; i++ )
257 {
258 final byte b = input[i];
259
260 // handle non-nicely printable bytes
261 if ( b < 32 || b > 126 || b == '\\' || b == ',' )
262 {
263 final int upper = ( 0xF0 & b ) >> 4;
264 final int lower = ( 0x0F & b );
265 encodeBytes[outputPos++] = '\\';
266 encodeBytes[outputPos++] = HEX_CHARS[upper];
267 encodeBytes[outputPos++] = HEX_CHARS[lower];
268 }
269 else
270 {
271 encodeBytes[outputPos++] = b;
272 }
273 }
274 encodeBytes[outputPos++] = (byte) '\n';
275
276 return new EncodedArray( encodeBytes, outputPos );
277 }
278
279 /**
280 * Reverses the effect of {@link #escapeBytesToPrintable(byte[], byte[], int, int)}.
281 *
282 * @param str the input String
283 * @param charsetName the charset name
284 * @return the number of bytes written to {@code out}
285 */
286 public static ByteBuffer unescapeBytes( String str, String charsetName )
287 {
288 int outPos = 0;
289
290 if ( str == null )
291 {
292 return ByteBuffer.wrap( new byte[0] );
293 }
294
295 byte[] out = new byte[str.length()];
296 for ( int i = 0; i < str.length(); i++ )
297 {
298 char ch = str.charAt( i );
299
300 if ( ch == '\\' )
301 {
302 int upper = digit( str.charAt( ++i ) );
303 int lower = digit( str.charAt( ++i ) );
304 out[outPos++] = (byte) ( upper << 4 | lower );
305 }
306 else
307 {
308 out[outPos++] = (byte) ch;
309 }
310 }
311
312 Charset sourceCharset = Charset.forName( charsetName );
313 if ( !DEFAULT_CHARSET.equals( sourceCharset ) )
314 {
315 CharBuffer decodedFromSourceCharset;
316 try
317 {
318 decodedFromSourceCharset = sourceCharset.newDecoder().decode( ByteBuffer.wrap( out, 0, outPos ) );
319 return DEFAULT_CHARSET.encode( decodedFromSourceCharset );
320 }
321 catch ( CharacterCodingException e )
322 {
323 // ignore and fall through to the non-recoded version
324 }
325 }
326
327 return ByteBuffer.wrap( out, 0, outPos );
328 }
329
330 public static byte[] encodeStringForForkCommunication( String string )
331 {
332 return string.getBytes( ISO_8859_1 );
333 }
334
335 /**
336 * Determines if {@code buffer} starts with specific literal(s).
337 *
338 * @param buffer Examined StringBuffer
339 * @param pattern a pattern which should start in {@code buffer}
340 * @return {@code true} if buffer's literal starts with given {@code pattern}, or both are empty.
341 */
342 public static boolean startsWith( StringBuffer buffer, String pattern )
343 {
344 if ( buffer.length() < pattern.length() )
345 {
346 return false;
347 }
348 else
349 {
350 for ( int i = 0, len = pattern.length(); i < len; i++ )
351 {
352 if ( buffer.charAt( i ) != pattern.charAt( i ) )
353 {
354 return false;
355 }
356 }
357 return true;
358 }
359 }
360
361 /**
362 * Escaped string to byte array with offset 0 and certain length.
363 */
364 public static final class EncodedArray
365 {
366 private static final EncodedArray EMPTY = new EncodedArray( new byte[]{}, 0 );
367
368 private final byte[] array;
369 private final int size;
370
371 private EncodedArray( byte[] array, int size )
372 {
373 this.array = array;
374 this.size = size;
375 }
376
377 public byte[] getArray()
378 {
379 return array;
380 }
381
382 public int getSize()
383 {
384 return size;
385 }
386 }
387 }