View Javadoc

1   package org.apache.maven.shared.utils.xml;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *  http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  /**
23   * Collection of XML encoding/decoding helpers. <br>
24   * This is all about the special characters &amp; and &lt;, and for attributes
25   * &quot; and &apos;. These must be encoded/decoded from/to XML.
26   */
27  final class XMLEncode
28  {
29  
30      private final static int CDATA_BLOCK_THRESHOLD_LENGTH = 12;
31      private final static char DEFAULT_QUOTE_CHAR = '"';
32  
33      /**
34       * Checks if this text purely consists of the white space characters
35       * ' ',  TAB, NEWLINE.
36       */
37      public static boolean isWhiteSpace( String text )
38      {
39          for ( int i = 0; i < text.length(); i++ )
40          {
41              char c = text.charAt( i );
42              if ( !Character.isWhitespace( c ) )
43              {
44                  return false;
45              }
46          }
47          return true;
48      }
49  
50      /**
51       * Makes any text fit into XML attributes.
52       */
53      public static String xmlEncodeTextForAttribute( String text, char quoteChar )
54      {
55          if ( text == null )
56          {
57              return null;
58          }
59          return xmlEncodeTextAsPCDATA( text, true, quoteChar );
60      }
61  
62      /**
63       * Encodes text as XML in the most suitable way, either CDATA block or PCDATA.
64       */
65      public static String xmlEncodeText( String text )
66      {
67          if ( text == null )
68          {
69              return null;
70          }
71          if ( !needsEncoding( text ) )
72          {
73              return text;
74          }
75          else
76          {
77              // only encode as cdata if is is longer than CDATA block overhead:
78              if ( text.length() > CDATA_BLOCK_THRESHOLD_LENGTH )
79              {
80                  String cdata = xmlEncodeTextAsCDATABlock( text );
81                  if ( cdata != null )
82                  {
83                      return cdata;
84                  }
85              }
86          }
87          // if every thing else fails, do it the save way...
88          return xmlEncodeTextAsPCDATA( text );
89      }
90  
91      /**
92       * Encodes any text as PCDATA.
93       */
94      public static String xmlEncodeTextAsPCDATA( String text )
95      {
96          if ( text == null )
97          {
98              return null;
99          }
100         return xmlEncodeTextAsPCDATA( text, false );
101     }
102 
103     /**
104      * Encodes any text as PCDATA.
105      *
106      * @param forAttribute if you want
107      *                     quotes and apostrophes specially treated for attributes
108      */
109     public static String xmlEncodeTextAsPCDATA( String text, boolean forAttribute )
110     {
111         return xmlEncodeTextAsPCDATA( text, forAttribute, DEFAULT_QUOTE_CHAR );
112     }
113 
114     /**
115      * Encodes any text as PCDATA.
116      *
117      * @param forAttribute if you want
118      *                     quotes and apostrophes specially treated for attributes
119      * @param quoteChar    if this is for attributes this <code>char</code> is used to quote the attribute value
120      */
121     public static String xmlEncodeTextAsPCDATA( String text, boolean forAttribute, char quoteChar )
122     {
123         if ( text == null )
124         {
125             return null;
126         }
127         char c;
128         StringBuilder n = new StringBuilder( text.length() * 2 );
129         for ( int i = 0; i < text.length(); i++ )
130         {
131             c = text.charAt( i );
132             switch ( c )
133             {
134                 case '&':
135                     n.append( "&amp;" );
136                     break;
137                 case '<':
138                     n.append( "&lt;" );
139                     break;
140                 case '>': // FIX for sourceforge bug #802520 ("]]>" needs encoding)
141                     n.append( "&gt;" );
142                     break;
143                 case '"':
144                     if ( forAttribute )
145                     {
146                         n.append( "&quot;" );
147                     }
148                     else
149                     {
150                         n.append( c );
151                     }
152                     break;
153                 case '\'':
154                     if ( forAttribute )
155                     {
156                         n.append( "&apos;" );
157                     }
158                     else
159                     {
160                         n.append( c );
161                     }
162                     break;
163                 case '\r':
164                     if ( forAttribute )
165                     {
166                         if ( i == text.length() || text.charAt( i + 1 ) != '\n' )
167                         {
168                             n.append( "&#13;" );
169                         }
170                     }
171                     else
172                     {
173                         n.append( c );
174                     }
175                     // but skip the \r in \r\n
176 
177 
178                     break;
179                 case '\n':
180                     if ( forAttribute )
181                     {
182                         n.append( "&#10;" );
183                     }
184                     break;
185 
186                 default:
187                 {
188                     n.append( c );
189                     break;
190                 }
191             }
192         }
193 
194         if ( forAttribute )
195         {
196             n.append( quoteChar );
197             n.insert( 0, quoteChar );
198         }
199 
200         return n.toString();
201     }
202 
203     /**
204      * Returns string as CDATA block if possible, otherwise null.
205      */
206     public static String xmlEncodeTextAsCDATABlock( String text )
207     {
208         if ( text == null )
209         {
210             return null;
211         }
212         if ( isCompatibleWithCDATABlock( text ) )
213         {
214             return "<![CDATA[" + text + "]]>";
215         }
216         else
217         {
218             return null;
219         }
220     }
221 
222     /**
223      * Checks if this text needs encoding in order to be represented in XML.
224      */
225     public static boolean needsEncoding( String text )
226     {
227         return needsEncoding( text, false );
228     }
229 
230     /**
231      * Checks if this text needs encoding in order to be represented in XML.
232      * <p/>
233      * Set <code>checkForAttr</code> if you want to check for storability in
234      * an attribute.
235      */
236     public static boolean needsEncoding( String data, boolean checkForAttr )
237     {
238         if ( data == null )
239         {
240             return false;
241         }
242         char c;
243         for ( int i = 0; i < data.length(); i++ )
244         {
245             c = data.charAt( i );
246             if ( c == '&' || c == '<' || ( checkForAttr && ( c == '"' || c == '\'' ) ) )
247             {
248                 return true;
249             }
250         }
251         return false;
252     }
253 
254     /**
255      * Can this text be stored into a CDATA block?
256      */
257     public static boolean isCompatibleWithCDATABlock( String text )
258     {
259         return text != null && ( !text.contains( "]]>" ) );
260     }
261 
262     /**
263      * Make CDATA out of possibly encoded PCDATA. <br>
264      * E.g. make '&amp;' out of '&amp;amp;'
265      */
266     public static String xmlDecodeTextToCDATA( String pcdata )
267     {
268         if ( pcdata == null )
269         {
270             return null;
271         }
272         char c, c1, c2, c3, c4, c5;
273         StringBuilder n = new StringBuilder( pcdata.length() );
274         for ( int i = 0; i < pcdata.length(); i++ )
275         {
276             c = pcdata.charAt( i );
277             if ( c == '&' )
278             {
279                 c1 = lookAhead( 1, i, pcdata );
280                 c2 = lookAhead( 2, i, pcdata );
281                 c3 = lookAhead( 3, i, pcdata );
282                 c4 = lookAhead( 4, i, pcdata );
283                 c5 = lookAhead( 5, i, pcdata );
284 
285                 if ( c1 == 'a' && c2 == 'm' && c3 == 'p' && c4 == ';' )
286                 {
287                     n.append( "&" );
288                     i += 4;
289                 }
290                 else if ( c1 == 'l' && c2 == 't' && c3 == ';' )
291                 {
292                     n.append( "<" );
293                     i += 3;
294                 }
295                 else if ( c1 == 'g' && c2 == 't' && c3 == ';' )
296                 {
297                     n.append( ">" );
298                     i += 3;
299                 }
300                 else if ( c1 == 'q' && c2 == 'u' && c3 == 'o' && c4 == 't' && c5 == ';' )
301                 {
302                     n.append( "\"" );
303                     i += 5;
304                 }
305                 else if ( c1 == 'a' && c2 == 'p' && c3 == 'o' && c4 == 's' && c5 == ';' )
306                 {
307                     n.append( "'" );
308                     i += 5;
309                 }
310                 else
311                 {
312                     n.append( "&" );
313                 }
314             }
315             else
316             {
317                 n.append( c );
318             }
319         }
320         return n.toString();
321     }
322 
323     private static char lookAhead( int la, int offset, String data )
324     {
325         try
326         {
327             return data.charAt( offset + la );
328         }
329         catch ( StringIndexOutOfBoundsException e )
330         {
331             return 0x0;
332         }
333     }
334 
335     // combine multiple checks in one methods for speed
336     private static boolean contains( String text, char[] chars )
337     {
338         if ( text == null || chars == null || chars.length == 0 )
339         {
340             return false;
341         }
342         for ( int i = 0; i < text.length(); i++ )
343         {
344             char c = text.charAt( i );
345             for ( char aChar : chars )
346             {
347                 if ( aChar == c )
348                 {
349                     return true;
350                 }
351             }
352         }
353         return false;
354     }
355 
356 }