1 package org.apache.maven.surefire.util.internal;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 import java.io.UnsupportedEncodingException;
23 import java.nio.ByteBuffer;
24 import java.nio.CharBuffer;
25 import java.nio.charset.CharacterCodingException;
26 import java.nio.charset.Charset;
27 import java.util.StringTokenizer;
28
29 /**
30 * <p>
31 * Common <code>String</code> manipulation routines.
32 * </p>
33 * <p/>
34 * <p>
35 * Originally from <a href="http://jakarta.apache.org/turbine/">Turbine</a> and the GenerationJavaCore library.
36 * </p>
37 *
38 * @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a>
39 * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a>
40 * @author <a href="mailto:gcoladonato@yahoo.com">Greg Coladonato</a>
41 * @author <a href="mailto:bayard@generationjava.com">Henri Yandell</a>
42 * @author <a href="mailto:ed@codehaus.org">Ed Korthof</a>
43 * @author <a href="mailto:rand_mcneely@yahoo.com">Rand McNeely</a>
44 * @author Stephen Colebourne
45 * @author <a href="mailto:fredrik@westermarck.com">Fredrik Westermarck</a>
46 * @author Holger Krauth
47 * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
48 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
49 * @version $Id: StringUtils.java 8001 2009-01-03 13:17:09Z vsiveton $
50 * @noinspection JavaDoc
51 * <p/>
52 * A quick borrow from plexus-utils by Kristian Rosenvold, to restore jdk1.3 compat Threw away all the
53 * unused stuff.
54 * <p/>
55 * NOTE: This class is not part of any api and is public purely for technical reasons !
56 * @since 1.0
57 */
58 public class StringUtils
59 {
60 private static final byte[] HEX_CHARS = new byte[] {
61 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
62 'A', 'B', 'C', 'D', 'E', 'F' };
63
64 private static final Charset DEFAULT_CHARSET = Charset.defaultCharset();
65
66 // 8-bit charset Latin-1
67 public static final String FORK_STREAM_CHARSET_NAME = "ISO-8859-1";
68
69 public static String[] split( String text, String separator )
70 {
71 int max = -1;
72 StringTokenizer tok;
73 if ( separator == null )
74 {
75 // Null separator means we're using StringTokenizer's default
76 // delimiter, which comprises all whitespace characters.
77 tok = new StringTokenizer( text );
78 }
79 else
80 {
81 tok = new StringTokenizer( text, separator );
82 }
83
84 int listSize = tok.countTokens();
85 if ( max > 0 && listSize > max )
86 {
87 listSize = max;
88 }
89
90 String[] list = new String[listSize];
91 int i = 0;
92 int lastTokenBegin;
93 int lastTokenEnd = 0;
94 while ( tok.hasMoreTokens() )
95 {
96 if ( max > 0 && i == listSize - 1 )
97 {
98 // In the situation where we hit the max yet have
99 // tokens left over in our input, the last list
100 // element gets all remaining text.
101 String endToken = tok.nextToken();
102 lastTokenBegin = text.indexOf( endToken, lastTokenEnd );
103 list[i] = text.substring( lastTokenBegin );
104 break;
105 }
106 else
107 {
108 list[i] = tok.nextToken();
109 lastTokenBegin = text.indexOf( list[i], lastTokenEnd );
110 lastTokenEnd = lastTokenBegin + list[i].length();
111 }
112 i++;
113 }
114 return list;
115 }
116
117 /**
118 * <p>
119 * Checks if a (trimmed) String is <code>null</code> or blank.
120 * </p>
121 *
122 * @param str the String to check
123 * @return <code>true</code> if the String is <code>null</code>, or length zero once trimmed
124 */
125 public static boolean isBlank( String str )
126 {
127 return str == null || str.trim().length() == 0;
128 }
129
130 /**
131 * <p>
132 * Checks if a (trimmed) String is not <code>null</code> and not blank.
133 * </p>
134 *
135 * @param str the String to check
136 * @return <code>true</code> if the String is not <code>null</code> and length of trimmed
137 * <code>str</code> is not zero.
138 */
139 public static boolean isNotBlank( String str )
140 {
141 return !isBlank( str );
142 }
143
144 /**
145 * Escape the specified string to a representation that only consists of nicely printable characters, without any
146 * newlines and without a comma.
147 * <p>
148 * The reverse-method is {@link #unescapeString(StringBuilder, CharSequence)}.
149 *
150 * @param target target string buffer. The required space will be up to {@code str.getBytes().length * 5} chars.
151 * @param str String to escape values in, may be {@code null}.
152 */
153 @SuppressWarnings( "checkstyle:magicnumber" )
154 public static void escapeToPrintable( StringBuilder target, CharSequence str )
155 {
156 if ( target == null )
157 {
158 throw new IllegalArgumentException( "The target buffer must not be null" );
159 }
160 if ( str == null )
161 {
162 return;
163 }
164
165 for ( int i = 0; i < str.length(); i++ )
166 {
167 char c = str.charAt( i );
168
169 // handle non-nicely printable chars and the comma
170 if ( c < 32 || c > 126 || c == '\\' || c == ',' )
171 {
172 target.append( '\\' );
173 target.append( (char) HEX_CHARS[( 0xF000 & c ) >> 12] );
174 target.append( (char) HEX_CHARS[( 0x0F00 & c ) >> 8] );
175 target.append( (char) HEX_CHARS[( 0x00F0 & c ) >> 4] );
176 target.append( (char) HEX_CHARS[( 0x000F & c )] );
177 }
178 else
179 {
180 target.append( c );
181 }
182 }
183 }
184
185 /**
186 * Reverses the effect of {@link #escapeToPrintable(StringBuilder, CharSequence)}.
187 *
188 * @param target target string buffer
189 * @param str the String to un-escape, as created by {@link #escapeToPrintable(StringBuilder, CharSequence)}
190 */
191 public static void unescapeString( StringBuilder target, CharSequence str )
192 {
193 if ( target == null )
194 {
195 throw new IllegalArgumentException( "The target buffer must not be null" );
196 }
197 if ( str == null )
198 {
199 return;
200 }
201
202 for ( int i = 0; i < str.length(); i++ )
203 {
204 char ch = str.charAt( i );
205
206 if ( ch == '\\' )
207 {
208 target.append( (char) (
209 digit( str.charAt( ++i ) ) << 12
210 | digit( str.charAt( ++i ) ) << 8
211 | digit( str.charAt( ++i ) ) << 4
212 | digit( str.charAt( ++i ) )
213 ) );
214 }
215 else
216 {
217 target.append( ch );
218 }
219 }
220 }
221
222 private static int digit( char ch )
223 {
224 if ( ch >= 'a' )
225 {
226 return 10 + ch - 'a';
227 }
228 else if ( ch >= 'A' )
229 {
230 return 10 + ch - 'A';
231 }
232 else
233 {
234 return ch - '0';
235 }
236 }
237
238 /**
239 * Escapes the bytes in the array {@code str} to contain only 'printable' bytes.
240 * <p>
241 * Escaping is done by encoding the non-nicely printable bytes to {@code '\' + upperCaseHexBytes(byte)}.
242 * <p>
243 * A save length of {@code out} is {@code len * 3 + outoff}.
244 * <p>
245 * The reverse-method is {@link #unescapeBytes(byte[], String)}.
246 *
247 * @param out output buffer
248 * @param outoff offset in the output buffer
249 * @param input input buffer
250 * @param off offset in the input buffer
251 * @param len number of bytes to copy from the input buffer
252 * @return number of bytes written to {@code out}
253 */
254 @SuppressWarnings( "checkstyle:magicnumber" )
255 public static int escapeBytesToPrintable( byte[] out, int outoff, byte[] input, int off, int len )
256 {
257 if ( out == null )
258 {
259 throw new IllegalArgumentException( "The output array must not be null" );
260 }
261 if ( input == null || input.length == 0 )
262 {
263 return 0;
264 }
265 int outputPos = outoff;
266 int end = off + len;
267 for ( int i = off; i < end; i++ )
268 {
269 byte b = input[i];
270
271 // handle non-nicely printable bytes
272 if ( b < 32 || b > 126 || b == '\\' || b == ',' )
273 {
274 int upper = ( 0xF0 & b ) >> 4;
275 int lower = ( 0x0F & b );
276 out[outputPos++] = '\\';
277 out[outputPos++] = HEX_CHARS[upper];
278 out[outputPos++] = HEX_CHARS[lower];
279 }
280 else
281 {
282 out[outputPos++] = b;
283 }
284 }
285
286 return outputPos - outoff;
287 }
288
289 /**
290 * Reverses the effect of {@link #escapeBytesToPrintable(byte[], int, byte[], int, int)}.
291 *
292 * @param str the input String
293 * @param charsetName the charset name
294 * @return the number of bytes written to {@code out}
295 */
296 public static ByteBuffer unescapeBytes( String str, String charsetName )
297 {
298 int outPos = 0;
299
300 if ( str == null )
301 {
302 return ByteBuffer.wrap( new byte[0] );
303 }
304
305 byte[] out = new byte[str.length()];
306 for ( int i = 0; i < str.length(); i++ )
307 {
308 char ch = str.charAt( i );
309
310 if ( ch == '\\' )
311 {
312 int upper = digit( str.charAt( ++i ) );
313 int lower = digit( str.charAt( ++i ) );
314 out[outPos++] = (byte) ( upper << 4 | lower );
315 }
316 else
317 {
318 out[outPos++] = (byte) ch;
319 }
320 }
321
322 Charset sourceCharset = Charset.forName( charsetName );
323 if ( !DEFAULT_CHARSET.equals( sourceCharset ) )
324 {
325 CharBuffer decodedFromSourceCharset;
326 try
327 {
328 decodedFromSourceCharset = sourceCharset.newDecoder().decode( ByteBuffer.wrap( out, 0, outPos ) );
329 ByteBuffer defaultEncoded = DEFAULT_CHARSET.encode( decodedFromSourceCharset );
330
331 return defaultEncoded;
332 }
333 catch ( CharacterCodingException e )
334 {
335 // ignore and fall through to the non-recoded version
336 }
337 }
338
339 return ByteBuffer.wrap( out, 0, outPos );
340 }
341
342 public static String decode( byte[] toDecode, Charset charset )
343 {
344 try
345 {
346 // @todo use new JDK 1.6 constructor String(byte bytes[], Charset charset)
347 return new String( toDecode, charset.name() );
348 }
349 catch ( UnsupportedEncodingException e )
350 {
351 throw new RuntimeException( "The JVM must support Charset " + charset, e );
352 }
353 }
354
355 public static byte[] encode( String toEncode, Charset charset )
356 {
357 try
358 {
359 // @todo use new JDK 1.6 method getBytes(Charset charset)
360 return toEncode.getBytes( charset.name() );
361 }
362 catch ( UnsupportedEncodingException e )
363 {
364 throw new RuntimeException( "The JVM must support Charset " + charset, e );
365 }
366 }
367
368 public static byte[] encodeStringForForkCommunication( String string )
369 {
370 try
371 {
372 return string.getBytes( FORK_STREAM_CHARSET_NAME );
373 }
374 catch ( UnsupportedEncodingException e )
375 {
376 throw new RuntimeException( "The JVM must support Charset " + FORK_STREAM_CHARSET_NAME, e );
377 }
378 }
379
380 /*
381 * In JDK7 use java.util.Objects instead.
382 * */
383 public static <T> T requireNonNull( T obj, String message )
384 {
385 if ( obj == null )
386 {
387 throw new NullPointerException( message );
388 }
389 return obj;
390 }
391
392 /*
393 * In JDK7 use java.util.Objects instead.
394 * */
395 public static <T> T requireNonNull( T obj )
396 {
397 return requireNonNull( obj, null );
398 }
399 }