View Javadoc
1   package org.apache.maven.shared.utils.xml;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *  http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.io.StringWriter;
24  import java.io.Writer;
25  
26  /**
27   * Collection of XML encoding/decoding helpers. <br>
28   * This is all about the special characters &amp; and &lt;, and for attributes
29   * &quot; and &apos;. These must be encoded/decoded from/to XML.
30   */
31  final class XMLEncode
32  {
33  
34      private static final int CDATA_BLOCK_THRESHOLD_LENGTH = 12;
35  
36      private static final char DEFAULT_QUOTE_CHAR = '"';
37  
38      /**
39       * Checks if this text purely consists of the white space characters
40       * ' ',  TAB, NEWLINE.
41       */
42      public static boolean isWhiteSpace( String text )
43      {
44          for ( int i = 0; i < text.length(); i++ )
45          {
46              char c = text.charAt( i );
47              if ( !Character.isWhitespace( c ) )
48              {
49                  return false;
50              }
51          }
52          return true;
53      }
54  
55      /**
56       * Makes any text fit into XML attributes.
57       */
58      public static String xmlEncodeTextForAttribute( String text, char quoteChar )
59      {
60          if ( text == null )
61          {
62              return null;
63          }
64          return xmlEncodeTextAsPCDATA( text, true, quoteChar );
65      }
66  
67      /**
68       * Encodes text as XML in the most suitable way, either CDATA block or PCDATA.
69       */
70      public static String xmlEncodeText( String text )
71      {
72          if ( text == null )
73          {
74              return null;
75          }
76          StringWriter writer = new StringWriter( text.length() * 2 );
77          xmlEncodeText( text, writer );
78          return writer.toString();
79      }
80  
81      public static void xmlEncodeText( String text, Writer writer )
82      {
83          if ( text == null )
84          {
85              return;
86          }
87          try
88          {
89              if ( !needsEncoding( text ) )
90              {
91                  writer.write( text );
92                  return;
93              }
94              else
95              {
96                  // only encode as cdata if is is longer than CDATA block overhead:
97                  if ( text.length() > CDATA_BLOCK_THRESHOLD_LENGTH )
98                  {
99                      String cdata = xmlEncodeTextAsCDATABlock( text );
100                     if ( cdata != null )
101                     {
102                         writer.write( cdata );
103                         return;
104                     }
105                 }
106             }
107         }
108         catch ( IOException e )
109         {
110             throw new RuntimeException( e );
111         }
112         // if every thing else fails, do it the save way...
113         xmlEncodeTextAsPCDATA( text, false, DEFAULT_QUOTE_CHAR, writer );
114     }
115 
116     /**
117      * Encodes any text as PCDATA.
118      */
119     public static String xmlEncodeTextAsPCDATA( String text )
120     {
121         if ( text == null )
122         {
123             return null;
124         }
125         return xmlEncodeTextAsPCDATA( text, false );
126     }
127 
128     /**
129      * Encodes any text as PCDATA.
130      *
131      * @param forAttribute if you want
132      *                     quotes and apostrophes specially treated for attributes
133      */
134     public static String xmlEncodeTextAsPCDATA( String text, boolean forAttribute )
135     {
136         return xmlEncodeTextAsPCDATA( text, forAttribute, DEFAULT_QUOTE_CHAR );
137     }
138 
139     /**
140      * Encodes any text as PCDATA.
141      *
142      * @param forAttribute if you want
143      *                     quotes and apostrophes specially treated for attributes
144      * @param quoteChar    if this is for attributes this <code>char</code> is used to quote the attribute value
145      */
146     public static String xmlEncodeTextAsPCDATA( String text, boolean forAttribute, char quoteChar )
147     {
148         if ( text == null )
149         {
150             return null;
151         }
152         StringWriter writer = new StringWriter( text.length() * 2 );
153         xmlEncodeTextAsPCDATA( text, forAttribute, quoteChar, writer );
154         return writer.toString();
155     }
156 
157     public static void xmlEncodeTextAsPCDATA( String text, boolean forAttribute, char quoteChar, Writer n )
158     {
159         if ( text == null )
160         {
161             return;
162         }
163         try
164         {
165             char c;
166             int length = text.length();
167             if ( forAttribute )
168             {
169                 n.append( quoteChar );
170             }
171 
172             for ( int i = 0; i < length; i++ )
173             {
174                 c = text.charAt( i );
175                 switch ( c )
176                 {
177                     case '&':
178                         n.append( "&amp;" );
179                         break;
180                     case '<':
181                         n.append( "&lt;" );
182                         break;
183                     case '>': // FIX for sourceforge bug #802520 ("]]>" needs encoding)
184                         n.append( "&gt;" );
185                         break;
186                     case '"':
187                         if ( forAttribute )
188                         {
189                             n.append( "&quot;" );
190                         }
191                         else
192                         {
193                             n.append( c );
194                         }
195                         break;
196                     case '\'':
197                         if ( forAttribute )
198                         {
199                             n.append( "&apos;" );
200                         }
201                         else
202                         {
203                             n.append( c );
204                         }
205                         break;
206                     case '\r':
207                         if ( forAttribute )
208                         {
209                             if ( i == ( length - 1 ) || text.charAt( i + 1 ) != '\n' )
210                             {
211                                 n.append( "&#13;" );
212                             }
213                         }
214                         else
215                         {
216                             n.append( c );
217                         }
218                         // but skip the \r in \r\n
219 
220                         break;
221                     case '\n':
222                         if ( forAttribute )
223                         {
224                             n.append( "&#10;" );
225                         }
226                         break;
227 
228                     default:
229                         n.append( c );
230                         break;
231                 }
232             }
233 
234             if ( forAttribute )
235             {
236                 n.append( quoteChar );
237             }
238         }
239         catch ( IOException e )
240         {
241             throw new RuntimeException( e );
242         }
243 
244     }
245 
246     /**
247      * Returns string as CDATA block if possible, otherwise null.
248      */
249     public static String xmlEncodeTextAsCDATABlock( String text )
250     {
251         if ( text == null )
252         {
253             return null;
254         }
255         if ( isCompatibleWithCDATABlock( text ) )
256         {
257             return "<![CDATA[" + text + "]]>";
258         }
259         else
260         {
261             return null;
262         }
263     }
264 
265     /**
266      * Checks if this text needs encoding in order to be represented in XML.
267      */
268     public static boolean needsEncoding( String text )
269     {
270         return needsEncoding( text, false );
271     }
272 
273     /**
274      * Checks if this text needs encoding in order to be represented in XML.
275      * <p/>
276      * Set <code>checkForAttr</code> if you want to check for storability in
277      * an attribute.
278      */
279     public static boolean needsEncoding( String data, boolean checkForAttr )
280     {
281         if ( data == null )
282         {
283             return false;
284         }
285         char c;
286         for ( int i = 0; i < data.length(); i++ )
287         {
288             c = data.charAt( i );
289             if ( c == '&' || c == '<' || ( checkForAttr && ( c == '"' || c == '\'' ) ) )
290             {
291                 return true;
292             }
293         }
294         return false;
295     }
296 
297     /**
298      * Can this text be stored into a CDATA block?
299      */
300     public static boolean isCompatibleWithCDATABlock( String text )
301     {
302         return text != null && ( !text.contains( "]]>" ) );
303     }
304 
305     /**
306      * Make CDATA out of possibly encoded PCDATA. <br>
307      * E.g. make '&amp;' out of '&amp;amp;'
308      */
309     public static String xmlDecodeTextToCDATA( String pcdata )
310     {
311         if ( pcdata == null )
312         {
313             return null;
314         }
315         char c, c1, c2, c3, c4, c5;
316         StringBuilder n = new StringBuilder( pcdata.length() );
317         for ( int i = 0; i < pcdata.length(); i++ )
318         {
319             c = pcdata.charAt( i );
320             if ( c == '&' )
321             {
322                 c1 = lookAhead( 1, i, pcdata );
323                 c2 = lookAhead( 2, i, pcdata );
324                 c3 = lookAhead( 3, i, pcdata );
325                 c4 = lookAhead( 4, i, pcdata );
326                 c5 = lookAhead( 5, i, pcdata );
327 
328                 if ( c1 == 'a' && c2 == 'm' && c3 == 'p' && c4 == ';' )
329                 {
330                     n.append( "&" );
331                     i += 4;
332                 }
333                 else if ( c1 == 'l' && c2 == 't' && c3 == ';' )
334                 {
335                     n.append( "<" );
336                     i += 3;
337                 }
338                 else if ( c1 == 'g' && c2 == 't' && c3 == ';' )
339                 {
340                     n.append( ">" );
341                     i += 3;
342                 }
343                 else if ( c1 == 'q' && c2 == 'u' && c3 == 'o' && c4 == 't' && c5 == ';' )
344                 {
345                     n.append( "\"" );
346                     i += 5;
347                 }
348                 else if ( c1 == 'a' && c2 == 'p' && c3 == 'o' && c4 == 's' && c5 == ';' )
349                 {
350                     n.append( "'" );
351                     i += 5;
352                 }
353                 else
354                 {
355                     n.append( "&" );
356                 }
357             }
358             else
359             {
360                 n.append( c );
361             }
362         }
363         return n.toString();
364     }
365 
366     private static char lookAhead( int la, int offset, String data )
367     {
368         try
369         {
370             return data.charAt( offset + la );
371         }
372         catch ( StringIndexOutOfBoundsException e )
373         {
374             return 0x0;
375         }
376     }
377 
378 }