View Javadoc
1   package org.apache.maven.shared.utils.xml;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *  http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  /**
23   * Collection of XML encoding/decoding helpers. <br>
24   * This is all about the special characters &amp; and &lt;, and for attributes
25   * &quot; and &apos;. These must be encoded/decoded from/to XML.
26   */
27  final class XMLEncode
28  {
29  
30      private static final int CDATA_BLOCK_THRESHOLD_LENGTH = 12;
31      private static final char DEFAULT_QUOTE_CHAR = '"';
32  
33      /**
34       * Checks if this text purely consists of the white space characters
35       * ' ',  TAB, NEWLINE.
36       */
37      public static boolean isWhiteSpace( String text )
38      {
39          for ( int i = 0; i < text.length(); i++ )
40          {
41              char c = text.charAt( i );
42              if ( !Character.isWhitespace( c ) )
43              {
44                  return false;
45              }
46          }
47          return true;
48      }
49  
50      /**
51       * Makes any text fit into XML attributes.
52       */
53      public static String xmlEncodeTextForAttribute( String text, char quoteChar )
54      {
55          if ( text == null )
56          {
57              return null;
58          }
59          return xmlEncodeTextAsPCDATA( text, true, quoteChar );
60      }
61  
62      /**
63       * Encodes text as XML in the most suitable way, either CDATA block or PCDATA.
64       */
65      public static String xmlEncodeText( String text )
66      {
67          if ( text == null )
68          {
69              return null;
70          }
71          if ( !needsEncoding( text ) )
72          {
73              return text;
74          }
75          else
76          {
77              // only encode as cdata if is is longer than CDATA block overhead:
78              if ( text.length() > CDATA_BLOCK_THRESHOLD_LENGTH )
79              {
80                  String cdata = xmlEncodeTextAsCDATABlock( text );
81                  if ( cdata != null )
82                  {
83                      return cdata;
84                  }
85              }
86          }
87          // if every thing else fails, do it the save way...
88          return xmlEncodeTextAsPCDATA( text );
89      }
90  
91      /**
92       * Encodes any text as PCDATA.
93       */
94      public static String xmlEncodeTextAsPCDATA( String text )
95      {
96          if ( text == null )
97          {
98              return null;
99          }
100         return xmlEncodeTextAsPCDATA( text, false );
101     }
102 
103     /**
104      * Encodes any text as PCDATA.
105      *
106      * @param forAttribute if you want
107      *                     quotes and apostrophes specially treated for attributes
108      */
109     public static String xmlEncodeTextAsPCDATA( String text, boolean forAttribute )
110     {
111         return xmlEncodeTextAsPCDATA( text, forAttribute, DEFAULT_QUOTE_CHAR );
112     }
113 
114     /**
115      * Encodes any text as PCDATA.
116      *
117      * @param forAttribute if you want
118      *                     quotes and apostrophes specially treated for attributes
119      * @param quoteChar    if this is for attributes this <code>char</code> is used to quote the attribute value
120      */
121     public static String xmlEncodeTextAsPCDATA( String text, boolean forAttribute, char quoteChar )
122     {
123         if ( text == null )
124         {
125             return null;
126         }
127         char c;
128         int length = text.length();
129         StringBuilder n = new StringBuilder( length * 2 );
130         if ( forAttribute )
131         {
132             n.append( quoteChar );
133         }
134 
135         for ( int i = 0; i < length; i++ )
136         {
137             c = text.charAt( i );
138             switch ( c )
139             {
140                 case '&':
141                     n.append( "&amp;" );
142                     break;
143                 case '<':
144                     n.append( "&lt;" );
145                     break;
146                 case '>': // FIX for sourceforge bug #802520 ("]]>" needs encoding)
147                     n.append( "&gt;" );
148                     break;
149                 case '"':
150                     if ( forAttribute )
151                     {
152                         n.append( "&quot;" );
153                     }
154                     else
155                     {
156                         n.append( c );
157                     }
158                     break;
159                 case '\'':
160                     if ( forAttribute )
161                     {
162                         n.append( "&apos;" );
163                     }
164                     else
165                     {
166                         n.append( c );
167                     }
168                     break;
169                 case '\r':
170                     if ( forAttribute )
171                     {
172                         if ( i == ( length - 1 ) || text.charAt( i + 1 ) != '\n' )
173                         {
174                             n.append( "&#13;" );
175                         }
176                     }
177                     else
178                     {
179                         n.append( c );
180                     }
181                     // but skip the \r in \r\n
182 
183 
184                     break;
185                 case '\n':
186                     if ( forAttribute )
187                     {
188                         n.append( "&#10;" );
189                     }
190                     break;
191 
192                 default:
193                     n.append( c );
194                     break;
195             }
196         }
197 
198         if ( forAttribute )
199         {
200             n.append( quoteChar );
201         }
202 
203         return n.toString();
204     }
205 
206     /**
207      * Returns string as CDATA block if possible, otherwise null.
208      */
209     public static String xmlEncodeTextAsCDATABlock( String text )
210     {
211         if ( text == null )
212         {
213             return null;
214         }
215         if ( isCompatibleWithCDATABlock( text ) )
216         {
217             return "<![CDATA[" + text + "]]>";
218         }
219         else
220         {
221             return null;
222         }
223     }
224 
225     /**
226      * Checks if this text needs encoding in order to be represented in XML.
227      */
228     public static boolean needsEncoding( String text )
229     {
230         return needsEncoding( text, false );
231     }
232 
233     /**
234      * Checks if this text needs encoding in order to be represented in XML.
235      * <p/>
236      * Set <code>checkForAttr</code> if you want to check for storability in
237      * an attribute.
238      */
239     public static boolean needsEncoding( String data, boolean checkForAttr )
240     {
241         if ( data == null )
242         {
243             return false;
244         }
245         char c;
246         for ( int i = 0; i < data.length(); i++ )
247         {
248             c = data.charAt( i );
249             if ( c == '&' || c == '<' || ( checkForAttr && ( c == '"' || c == '\'' ) ) )
250             {
251                 return true;
252             }
253         }
254         return false;
255     }
256 
257     /**
258      * Can this text be stored into a CDATA block?
259      */
260     public static boolean isCompatibleWithCDATABlock( String text )
261     {
262         return text != null && ( !text.contains( "]]>" ) );
263     }
264 
265     /**
266      * Make CDATA out of possibly encoded PCDATA. <br>
267      * E.g. make '&amp;' out of '&amp;amp;'
268      */
269     public static String xmlDecodeTextToCDATA( String pcdata )
270     {
271         if ( pcdata == null )
272         {
273             return null;
274         }
275         char c, c1, c2, c3, c4, c5;
276         StringBuilder n = new StringBuilder( pcdata.length() );
277         for ( int i = 0; i < pcdata.length(); i++ )
278         {
279             c = pcdata.charAt( i );
280             if ( c == '&' )
281             {
282                 c1 = lookAhead( 1, i, pcdata );
283                 c2 = lookAhead( 2, i, pcdata );
284                 c3 = lookAhead( 3, i, pcdata );
285                 c4 = lookAhead( 4, i, pcdata );
286                 c5 = lookAhead( 5, i, pcdata );
287 
288                 if ( c1 == 'a' && c2 == 'm' && c3 == 'p' && c4 == ';' )
289                 {
290                     n.append( "&" );
291                     i += 4;
292                 }
293                 else if ( c1 == 'l' && c2 == 't' && c3 == ';' )
294                 {
295                     n.append( "<" );
296                     i += 3;
297                 }
298                 else if ( c1 == 'g' && c2 == 't' && c3 == ';' )
299                 {
300                     n.append( ">" );
301                     i += 3;
302                 }
303                 else if ( c1 == 'q' && c2 == 'u' && c3 == 'o' && c4 == 't' && c5 == ';' )
304                 {
305                     n.append( "\"" );
306                     i += 5;
307                 }
308                 else if ( c1 == 'a' && c2 == 'p' && c3 == 'o' && c4 == 's' && c5 == ';' )
309                 {
310                     n.append( "'" );
311                     i += 5;
312                 }
313                 else
314                 {
315                     n.append( "&" );
316                 }
317             }
318             else
319             {
320                 n.append( c );
321             }
322         }
323         return n.toString();
324     }
325 
326     private static char lookAhead( int la, int offset, String data )
327     {
328         try
329         {
330             return data.charAt( offset + la );
331         }
332         catch ( StringIndexOutOfBoundsException e )
333         {
334             return 0x0;
335         }
336     }
337 
338     // combine multiple checks in one methods for speed
339     private static boolean contains( String text, char[] chars )
340     {
341         if ( text == null || chars == null || chars.length == 0 )
342         {
343             return false;
344         }
345         for ( int i = 0; i < text.length(); i++ )
346         {
347             char c = text.charAt( i );
348             for ( char aChar : chars )
349             {
350                 if ( aChar == c )
351                 {
352                     return true;
353                 }
354             }
355         }
356         return false;
357     }
358 
359 }