View Javadoc

1   package org.apache.maven.shared.utils.xml;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *  http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  /**
23   * Collection of XML encoding/decoding helpers. <br>
24   * This is all about the special characters &amp; and &lt;, and for attributes
25   * &quot; and &apos;. These must be encoded/decoded from/to XML.
26   */
27  final class XMLEncode
28  {
29  
30      private final static int CDATA_BLOCK_THRESHOLD_LENGTH = 12;
31      private final static char DEFAULT_QUOTE_CHAR = '"';
32  
33      /**
34       * Checks if this text purely consists of the white space characters
35       * ' ',  TAB, NEWLINE.
36       */
37      public static boolean isWhiteSpace( String text )
38      {
39          for ( int i = 0; i < text.length(); i++ )
40          {
41              char c = text.charAt( i );
42              if ( !Character.isWhitespace( c ) )
43              {
44                  return false;
45              }
46          }
47          return true;
48      }
49  
50      /**
51       * Makes any text fit into XML attributes.
52       */
53      public static String xmlEncodeTextForAttribute( String text, char quoteChar )
54      {
55          if ( text == null )
56          {
57              return null;
58          }
59          return xmlEncodeTextAsPCDATA( text, true, quoteChar );
60      }
61  
62      /**
63       * Encodes text as XML in the most suitable way, either CDATA block or PCDATA.
64       */
65      public static String xmlEncodeText( String text )
66      {
67          if ( text == null )
68          {
69              return null;
70          }
71          if ( !needsEncoding( text ) )
72          {
73              return text;
74          }
75          else
76          {
77              // only encode as cdata if is is longer than CDATA block overhead:
78              if ( text.length() > CDATA_BLOCK_THRESHOLD_LENGTH )
79              {
80                  String cdata = xmlEncodeTextAsCDATABlock( text );
81                  if ( cdata != null )
82                  {
83                      return cdata;
84                  }
85              }
86          }
87          // if every thing else fails, do it the save way...
88          return xmlEncodeTextAsPCDATA( text );
89      }
90  
91      /**
92       * Encodes any text as PCDATA.
93       */
94      public static String xmlEncodeTextAsPCDATA( String text )
95      {
96          if ( text == null )
97          {
98              return null;
99          }
100         return xmlEncodeTextAsPCDATA( text, false );
101     }
102 
103     /**
104      * Encodes any text as PCDATA.
105      *
106      * @param forAttribute if you want
107      *                     quotes and apostrophes specially treated for attributes
108      */
109     public static String xmlEncodeTextAsPCDATA( String text, boolean forAttribute )
110     {
111         return xmlEncodeTextAsPCDATA( text, forAttribute, DEFAULT_QUOTE_CHAR );
112     }
113 
114     /**
115      * Encodes any text as PCDATA.
116      *
117      * @param forAttribute if you want
118      *                     quotes and apostrophes specially treated for attributes
119      * @param quoteChar    if this is for attributes this <code>char</code> is used to quote the attribute value
120      */
121     public static String xmlEncodeTextAsPCDATA( String text, boolean forAttribute, char quoteChar )
122     {
123         if ( text == null )
124         {
125             return null;
126         }
127         char c;
128         int length = text.length();
129         StringBuilder n = new StringBuilder( length * 2 );
130         if ( forAttribute )
131         {
132             n.append( quoteChar );
133         }
134 
135         for ( int i = 0; i < length; i++ )
136         {
137             c = text.charAt( i );
138             switch ( c )
139             {
140                 case '&':
141                     n.append( "&amp;" );
142                     break;
143                 case '<':
144                     n.append( "&lt;" );
145                     break;
146                 case '>': // FIX for sourceforge bug #802520 ("]]>" needs encoding)
147                     n.append( "&gt;" );
148                     break;
149                 case '"':
150                     if ( forAttribute )
151                     {
152                         n.append( "&quot;" );
153                     }
154                     else
155                     {
156                         n.append( c );
157                     }
158                     break;
159                 case '\'':
160                     if ( forAttribute )
161                     {
162                         n.append( "&apos;" );
163                     }
164                     else
165                     {
166                         n.append( c );
167                     }
168                     break;
169                 case '\r':
170                     if ( forAttribute )
171                     {
172                         if ( i == ( length - 1 ) || text.charAt( i + 1 ) != '\n' )
173                         {
174                             n.append( "&#13;" );
175                         }
176                     }
177                     else
178                     {
179                         n.append( c );
180                     }
181                     // but skip the \r in \r\n
182 
183 
184                     break;
185                 case '\n':
186                     if ( forAttribute )
187                     {
188                         n.append( "&#10;" );
189                     }
190                     break;
191 
192                 default:
193                 {
194                     n.append( c );
195                     break;
196                 }
197             }
198         }
199 
200         if ( forAttribute )
201         {
202             n.append( quoteChar );
203         }
204 
205         return n.toString();
206     }
207 
208     /**
209      * Returns string as CDATA block if possible, otherwise null.
210      */
211     public static String xmlEncodeTextAsCDATABlock( String text )
212     {
213         if ( text == null )
214         {
215             return null;
216         }
217         if ( isCompatibleWithCDATABlock( text ) )
218         {
219             return "<![CDATA[" + text + "]]>";
220         }
221         else
222         {
223             return null;
224         }
225     }
226 
227     /**
228      * Checks if this text needs encoding in order to be represented in XML.
229      */
230     public static boolean needsEncoding( String text )
231     {
232         return needsEncoding( text, false );
233     }
234 
235     /**
236      * Checks if this text needs encoding in order to be represented in XML.
237      * <p/>
238      * Set <code>checkForAttr</code> if you want to check for storability in
239      * an attribute.
240      */
241     public static boolean needsEncoding( String data, boolean checkForAttr )
242     {
243         if ( data == null )
244         {
245             return false;
246         }
247         char c;
248         for ( int i = 0; i < data.length(); i++ )
249         {
250             c = data.charAt( i );
251             if ( c == '&' || c == '<' || ( checkForAttr && ( c == '"' || c == '\'' ) ) )
252             {
253                 return true;
254             }
255         }
256         return false;
257     }
258 
259     /**
260      * Can this text be stored into a CDATA block?
261      */
262     public static boolean isCompatibleWithCDATABlock( String text )
263     {
264         return text != null && ( !text.contains( "]]>" ) );
265     }
266 
267     /**
268      * Make CDATA out of possibly encoded PCDATA. <br>
269      * E.g. make '&amp;' out of '&amp;amp;'
270      */
271     public static String xmlDecodeTextToCDATA( String pcdata )
272     {
273         if ( pcdata == null )
274         {
275             return null;
276         }
277         char c, c1, c2, c3, c4, c5;
278         StringBuilder n = new StringBuilder( pcdata.length() );
279         for ( int i = 0; i < pcdata.length(); i++ )
280         {
281             c = pcdata.charAt( i );
282             if ( c == '&' )
283             {
284                 c1 = lookAhead( 1, i, pcdata );
285                 c2 = lookAhead( 2, i, pcdata );
286                 c3 = lookAhead( 3, i, pcdata );
287                 c4 = lookAhead( 4, i, pcdata );
288                 c5 = lookAhead( 5, i, pcdata );
289 
290                 if ( c1 == 'a' && c2 == 'm' && c3 == 'p' && c4 == ';' )
291                 {
292                     n.append( "&" );
293                     i += 4;
294                 }
295                 else if ( c1 == 'l' && c2 == 't' && c3 == ';' )
296                 {
297                     n.append( "<" );
298                     i += 3;
299                 }
300                 else if ( c1 == 'g' && c2 == 't' && c3 == ';' )
301                 {
302                     n.append( ">" );
303                     i += 3;
304                 }
305                 else if ( c1 == 'q' && c2 == 'u' && c3 == 'o' && c4 == 't' && c5 == ';' )
306                 {
307                     n.append( "\"" );
308                     i += 5;
309                 }
310                 else if ( c1 == 'a' && c2 == 'p' && c3 == 'o' && c4 == 's' && c5 == ';' )
311                 {
312                     n.append( "'" );
313                     i += 5;
314                 }
315                 else
316                 {
317                     n.append( "&" );
318                 }
319             }
320             else
321             {
322                 n.append( c );
323             }
324         }
325         return n.toString();
326     }
327 
328     private static char lookAhead( int la, int offset, String data )
329     {
330         try
331         {
332             return data.charAt( offset + la );
333         }
334         catch ( StringIndexOutOfBoundsException e )
335         {
336             return 0x0;
337         }
338     }
339 
340     // combine multiple checks in one methods for speed
341     private static boolean contains( String text, char[] chars )
342     {
343         if ( text == null || chars == null || chars.length == 0 )
344         {
345             return false;
346         }
347         for ( int i = 0; i < text.length(); i++ )
348         {
349             char c = text.charAt( i );
350             for ( char aChar : chars )
351             {
352                 if ( aChar == c )
353                 {
354                     return true;
355                 }
356             }
357         }
358         return false;
359     }
360 
361 }