1 package org.apache.maven.shared.utils.xml;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 /**
23 * Collection of XML encoding/decoding helpers. <br>
24 * This is all about the special characters & and <, and for attributes
25 * " and '. These must be encoded/decoded from/to XML.
26 */
27 final class XMLEncode
28 {
29
30 private final static int CDATA_BLOCK_THRESHOLD_LENGTH = 12;
31 private final static char DEFAULT_QUOTE_CHAR = '"';
32
33 /**
34 * Checks if this text purely consists of the white space characters
35 * ' ', TAB, NEWLINE.
36 */
37 public static boolean isWhiteSpace( String text )
38 {
39 for ( int i = 0; i < text.length(); i++ )
40 {
41 char c = text.charAt( i );
42 if ( !Character.isWhitespace( c ) )
43 {
44 return false;
45 }
46 }
47 return true;
48 }
49
50 /**
51 * Makes any text fit into XML attributes.
52 */
53 public static String xmlEncodeTextForAttribute( String text, char quoteChar )
54 {
55 if ( text == null )
56 {
57 return null;
58 }
59 return xmlEncodeTextAsPCDATA( text, true, quoteChar );
60 }
61
62 /**
63 * Encodes text as XML in the most suitable way, either CDATA block or PCDATA.
64 */
65 public static String xmlEncodeText( String text )
66 {
67 if ( text == null )
68 {
69 return null;
70 }
71 if ( !needsEncoding( text ) )
72 {
73 return text;
74 }
75 else
76 {
77 // only encode as cdata if is is longer than CDATA block overhead:
78 if ( text.length() > CDATA_BLOCK_THRESHOLD_LENGTH )
79 {
80 String cdata = xmlEncodeTextAsCDATABlock( text );
81 if ( cdata != null )
82 {
83 return cdata;
84 }
85 }
86 }
87 // if every thing else fails, do it the save way...
88 return xmlEncodeTextAsPCDATA( text );
89 }
90
91 /**
92 * Encodes any text as PCDATA.
93 */
94 public static String xmlEncodeTextAsPCDATA( String text )
95 {
96 if ( text == null )
97 {
98 return null;
99 }
100 return xmlEncodeTextAsPCDATA( text, false );
101 }
102
103 /**
104 * Encodes any text as PCDATA.
105 *
106 * @param forAttribute if you want
107 * quotes and apostrophes specially treated for attributes
108 */
109 public static String xmlEncodeTextAsPCDATA( String text, boolean forAttribute )
110 {
111 return xmlEncodeTextAsPCDATA( text, forAttribute, DEFAULT_QUOTE_CHAR );
112 }
113
114 /**
115 * Encodes any text as PCDATA.
116 *
117 * @param forAttribute if you want
118 * quotes and apostrophes specially treated for attributes
119 * @param quoteChar if this is for attributes this <code>char</code> is used to quote the attribute value
120 */
121 public static String xmlEncodeTextAsPCDATA( String text, boolean forAttribute, char quoteChar )
122 {
123 if ( text == null )
124 {
125 return null;
126 }
127 char c;
128 StringBuilder n = new StringBuilder( text.length() * 2 );
129 for ( int i = 0; i < text.length(); i++ )
130 {
131 c = text.charAt( i );
132 switch ( c )
133 {
134 case '&':
135 n.append( "&" );
136 break;
137 case '<':
138 n.append( "<" );
139 break;
140 case '>': // FIX for sourceforge bug #802520 ("]]>" needs encoding)
141 n.append( ">" );
142 break;
143 case '"':
144 if ( forAttribute )
145 {
146 n.append( """ );
147 }
148 else
149 {
150 n.append( c );
151 }
152 break;
153 case '\'':
154 if ( forAttribute )
155 {
156 n.append( "'" );
157 }
158 else
159 {
160 n.append( c );
161 }
162 break;
163 case '\r':
164 if ( forAttribute )
165 {
166 if ( i == text.length() || text.charAt( i + 1 ) != '\n' )
167 {
168 n.append( " " );
169 }
170 }
171 else
172 {
173 n.append( c );
174 }
175 // but skip the \r in \r\n
176
177
178 break;
179 case '\n':
180 if ( forAttribute )
181 {
182 n.append( " " );
183 }
184 break;
185
186 default:
187 {
188 n.append( c );
189 break;
190 }
191 }
192 }
193
194 if ( forAttribute )
195 {
196 n.append( quoteChar );
197 n.insert( 0, quoteChar );
198 }
199
200 return n.toString();
201 }
202
203 /**
204 * Returns string as CDATA block if possible, otherwise null.
205 */
206 public static String xmlEncodeTextAsCDATABlock( String text )
207 {
208 if ( text == null )
209 {
210 return null;
211 }
212 if ( isCompatibleWithCDATABlock( text ) )
213 {
214 return "<![CDATA[" + text + "]]>";
215 }
216 else
217 {
218 return null;
219 }
220 }
221
222 /**
223 * Checks if this text needs encoding in order to be represented in XML.
224 */
225 public static boolean needsEncoding( String text )
226 {
227 return needsEncoding( text, false );
228 }
229
230 /**
231 * Checks if this text needs encoding in order to be represented in XML.
232 * <p/>
233 * Set <code>checkForAttr</code> if you want to check for storability in
234 * an attribute.
235 */
236 public static boolean needsEncoding( String data, boolean checkForAttr )
237 {
238 if ( data == null )
239 {
240 return false;
241 }
242 char c;
243 for ( int i = 0; i < data.length(); i++ )
244 {
245 c = data.charAt( i );
246 if ( c == '&' || c == '<' || ( checkForAttr && ( c == '"' || c == '\'' ) ) )
247 {
248 return true;
249 }
250 }
251 return false;
252 }
253
254 /**
255 * Can this text be stored into a CDATA block?
256 */
257 public static boolean isCompatibleWithCDATABlock( String text )
258 {
259 return text != null && ( !text.contains( "]]>" ) );
260 }
261
262 /**
263 * Make CDATA out of possibly encoded PCDATA. <br>
264 * E.g. make '&' out of '&amp;'
265 */
266 public static String xmlDecodeTextToCDATA( String pcdata )
267 {
268 if ( pcdata == null )
269 {
270 return null;
271 }
272 char c, c1, c2, c3, c4, c5;
273 StringBuilder n = new StringBuilder( pcdata.length() );
274 for ( int i = 0; i < pcdata.length(); i++ )
275 {
276 c = pcdata.charAt( i );
277 if ( c == '&' )
278 {
279 c1 = lookAhead( 1, i, pcdata );
280 c2 = lookAhead( 2, i, pcdata );
281 c3 = lookAhead( 3, i, pcdata );
282 c4 = lookAhead( 4, i, pcdata );
283 c5 = lookAhead( 5, i, pcdata );
284
285 if ( c1 == 'a' && c2 == 'm' && c3 == 'p' && c4 == ';' )
286 {
287 n.append( "&" );
288 i += 4;
289 }
290 else if ( c1 == 'l' && c2 == 't' && c3 == ';' )
291 {
292 n.append( "<" );
293 i += 3;
294 }
295 else if ( c1 == 'g' && c2 == 't' && c3 == ';' )
296 {
297 n.append( ">" );
298 i += 3;
299 }
300 else if ( c1 == 'q' && c2 == 'u' && c3 == 'o' && c4 == 't' && c5 == ';' )
301 {
302 n.append( "\"" );
303 i += 5;
304 }
305 else if ( c1 == 'a' && c2 == 'p' && c3 == 'o' && c4 == 's' && c5 == ';' )
306 {
307 n.append( "'" );
308 i += 5;
309 }
310 else
311 {
312 n.append( "&" );
313 }
314 }
315 else
316 {
317 n.append( c );
318 }
319 }
320 return n.toString();
321 }
322
323 private static char lookAhead( int la, int offset, String data )
324 {
325 try
326 {
327 return data.charAt( offset + la );
328 }
329 catch ( StringIndexOutOfBoundsException e )
330 {
331 return 0x0;
332 }
333 }
334
335 // combine multiple checks in one methods for speed
336 private static boolean contains( String text, char[] chars )
337 {
338 if ( text == null || chars == null || chars.length == 0 )
339 {
340 return false;
341 }
342 for ( int i = 0; i < text.length(); i++ )
343 {
344 char c = text.charAt( i );
345 for ( char aChar : chars )
346 {
347 if ( aChar == c )
348 {
349 return true;
350 }
351 }
352 }
353 return false;
354 }
355
356 }