View Javadoc

1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.BufferedReader;
23  import java.io.ByteArrayInputStream;
24  import java.io.File;
25  import java.io.FileOutputStream;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.io.OutputStream;
29  import java.io.Reader;
30  import java.io.StringReader;
31  import java.net.URL;
32  import java.util.Hashtable;
33  import java.util.LinkedHashMap;
34  import java.util.Locale;
35  import java.util.Map;
36  import java.util.regex.Matcher;
37  import java.util.regex.Pattern;
38  
39  import org.apache.http.HttpEntity;
40  import org.apache.http.HttpResponse;
41  import org.apache.http.HttpStatus;
42  import org.apache.http.client.ClientProtocolException;
43  import org.apache.http.client.HttpRequestRetryHandler;
44  import org.apache.http.client.methods.HttpGet;
45  import org.apache.http.impl.client.DefaultHttpClient;
46  import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
47  import org.apache.http.util.EntityUtils;
48  
49  import org.apache.maven.doxia.macro.MacroExecutionException;
50  import org.apache.maven.doxia.markup.XmlMarkup;
51  import org.apache.maven.doxia.sink.Sink;
52  import org.apache.maven.doxia.sink.SinkEventAttributeSet;
53  import org.apache.maven.doxia.util.HtmlTools;
54  import org.apache.maven.doxia.util.XmlValidator;
55  
56  import org.codehaus.plexus.util.FileUtils;
57  import org.codehaus.plexus.util.IOUtil;
58  import org.codehaus.plexus.util.StringUtils;
59  import org.codehaus.plexus.util.xml.pull.MXParser;
60  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
61  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
62  
63  import org.xml.sax.EntityResolver;
64  import org.xml.sax.InputSource;
65  import org.xml.sax.SAXException;
66  
67  /**
68   * An abstract class that defines some convenience methods for <code>XML</code> parsers.
69   *
70   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
71   * @version $Id: AbstractXmlParser.java 1185112 2011-10-17 11:33:00Z ltheussl $
72   * @since 1.0
73   */
74  public abstract class AbstractXmlParser
75      extends AbstractParser
76      implements XmlMarkup
77  {
78      /**
79       * Entity pattern for HTML entity, i.e. &#38;nbsp;
80       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
81       * <br/>
82       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
83       */
84      private static final Pattern PATTERN_ENTITY_1 =
85          Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
86  
87      /**
88       * Entity pattern for Unicode entity, i.e. &#38;#38;
89       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
90       * <br/>
91       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
92       */
93      private static final Pattern PATTERN_ENTITY_2 =
94          Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
95  
96      private boolean ignorableWhitespace;
97  
98      private boolean collapsibleWhitespace;
99  
100     private boolean trimmableWhitespace;
101 
102     private Map<String, String> entities;
103 
104     private boolean validate = false;
105 
106     /** {@inheritDoc} */
107     public void parse( Reader source, Sink sink )
108         throws ParseException
109     {
110         init();
111 
112         Reader src = source;
113 
114         // 1 first parsing if validation is required
115         if ( isValidate() )
116         {
117             String content;
118             try
119             {
120                 content = IOUtil.toString( new BufferedReader( src ) );
121             }
122             catch ( IOException e )
123             {
124                 throw new ParseException( "Error reading the model: " + e.getMessage(), e );
125             }
126 
127             new XmlValidator( getLog() ).validate( content );
128 
129             src = new StringReader( content );
130         }
131 
132         // 2 second parsing to process
133         try
134         {
135             XmlPullParser parser = new MXParser();
136 
137             parser.setInput( src );
138 
139             sink.enableLogging( getLog() );
140 
141             parseXml( parser, sink );
142         }
143         catch ( XmlPullParserException ex )
144         {
145             throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
146                                       ex.getColumnNumber() );
147         }
148         catch ( MacroExecutionException ex )
149         {
150             throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
151         }
152 
153         setSecondParsing( false );
154         init();
155     }
156 
157     /**
158      * {@inheritDoc}
159      *
160      * Convenience method to parse an arbitrary string and emit any xml events into the given sink.
161      */
162     @Override
163     public void parse( String string, Sink sink )
164         throws ParseException
165     {
166         super.parse( string, sink );
167     }
168 
169     /** {@inheritDoc} */
170     @Override
171     public final int getType()
172     {
173         return XML_TYPE;
174     }
175 
176     /**
177      * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
178      *
179      * @param parser A parser, not null.
180      * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
181      * @since 1.1
182      */
183     protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
184     {
185         int count = parser.getAttributeCount();
186 
187         if ( count < 0 )
188         {
189             return null;
190         }
191 
192         SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
193 
194         for ( int i = 0; i < count; i++ )
195         {
196             atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
197         }
198 
199         return atts;
200     }
201 
202     /**
203      * Parse the model from the XmlPullParser into the given sink.
204      *
205      * @param parser A parser, not null.
206      * @param sink the sink to receive the events.
207      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
208      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
209      */
210     private void parseXml( XmlPullParser parser, Sink sink )
211         throws XmlPullParserException, MacroExecutionException
212     {
213         int eventType = parser.getEventType();
214 
215         while ( eventType != XmlPullParser.END_DOCUMENT )
216         {
217             if ( eventType == XmlPullParser.START_TAG )
218             {
219                 handleStartTag( parser, sink );
220             }
221             else if ( eventType == XmlPullParser.END_TAG )
222             {
223                 handleEndTag( parser, sink );
224             }
225             else if ( eventType == XmlPullParser.TEXT )
226             {
227                 String text = getText( parser );
228 
229                 if ( isIgnorableWhitespace() )
230                 {
231                     if ( text.trim().length() != 0 )
232                     {
233                         handleText( parser, sink );
234                     }
235                 }
236                 else
237                 {
238                     handleText( parser, sink );
239                 }
240             }
241             else if ( eventType == XmlPullParser.CDSECT )
242             {
243                 handleCdsect( parser, sink );
244             }
245             else if ( eventType == XmlPullParser.COMMENT )
246             {
247                 handleComment( parser, sink );
248             }
249             else if ( eventType == XmlPullParser.ENTITY_REF )
250             {
251                 handleEntity( parser, sink );
252             }
253             else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
254             {
255                 // nop
256             }
257             else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
258             {
259                 // nop
260             }
261             else if ( eventType == XmlPullParser.DOCDECL )
262             {
263                 addLocalEntities( parser, parser.getText() );
264 
265                 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
266                 {
267                     addDTDEntities( parser, new String( res ) );
268                 }
269             }
270 
271             try
272             {
273                 eventType = parser.nextToken();
274             }
275             catch ( IOException io )
276             {
277                 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
278             }
279         }
280     }
281 
282     /**
283      * Goes through the possible start tags.
284      *
285      * @param parser A parser, not null.
286      * @param sink the sink to receive the events.
287      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
288      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
289      */
290     protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
291         throws XmlPullParserException, MacroExecutionException;
292 
293     /**
294      * Goes through the possible end tags.
295      *
296      * @param parser A parser, not null.
297      * @param sink the sink to receive the events.
298      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
299      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
300      */
301     protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
302         throws XmlPullParserException, MacroExecutionException;
303 
304     /**
305      * Handles text events.
306      *
307      * <p>This is a default implementation, if the parser points to a non-empty text element,
308      * it is emitted as a text event into the specified sink.</p>
309      *
310      * @param parser A parser, not null.
311      * @param sink the sink to receive the events. Not null.
312      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
313      */
314     protected void handleText( XmlPullParser parser, Sink sink )
315         throws XmlPullParserException
316     {
317         String text = getText( parser );
318 
319         /*
320          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
321          * parser so any whitespace that makes it here is significant.
322          */
323         if ( StringUtils.isNotEmpty( text ) )
324         {
325             sink.text( text );
326         }
327     }
328 
329     /**
330      * Handles CDATA sections.
331      *
332      * <p>This is a default implementation, all data are emitted as text
333      * events into the specified sink.</p>
334      *
335      * @param parser A parser, not null.
336      * @param sink the sink to receive the events. Not null.
337      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
338      */
339     protected void handleCdsect( XmlPullParser parser, Sink sink )
340         throws XmlPullParserException
341     {
342         sink.text( getText( parser ) );
343     }
344 
345     /**
346      * Handles comments.
347      *
348      * <p>This is a default implementation, all data are emitted as comment
349      * events into the specified sink.</p>
350      *
351      * @param parser A parser, not null.
352      * @param sink the sink to receive the events. Not null.
353      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
354      */
355     protected void handleComment( XmlPullParser parser, Sink sink )
356         throws XmlPullParserException
357     {
358         sink.comment( getText( parser ).trim() );
359     }
360 
361     /**
362      * Handles entities.
363      *
364      * <p>This is a default implementation, all entities are resolved and emitted as text
365      * events into the specified sink, except:</p>
366      * <ul>
367      * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
368      * are emitted as <code>nonBreakingSpace()</code> events.</li>
369      * </ul>
370      *
371      * @param parser A parser, not null.
372      * @param sink the sink to receive the events. Not null.
373      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
374      */
375     protected void handleEntity( XmlPullParser parser, Sink sink )
376         throws XmlPullParserException
377     {
378         String text = getText( parser );
379 
380         String name = parser.getName();
381 
382         if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
383         {
384             sink.nonBreakingSpace();
385         }
386         else
387         {
388             String unescaped = HtmlTools.unescapeHTML( text );
389 
390             sink.text( unescaped );
391         }
392     }
393 
394     /**
395      * Handles an unkown event.
396      *
397      * <p>This is a default implementation, all events are emitted as unknown
398      * events into the specified sink.</p>
399      *
400      * @param parser the parser to get the event from.
401      * @param sink the sink to receive the event.
402      * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
403      * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
404      * It will be passed as the first argument of the required parameters to the Sink
405      * {@link org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
406      * method.
407      */
408     protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
409     {
410         Object[] required = new Object[] { new Integer( type ) };
411 
412         SinkEventAttributeSet attribs = getAttributesFromParser( parser );
413 
414         sink.unknown( parser.getName(), required, attribs );
415     }
416 
417     /**
418      * <p>isIgnorableWhitespace.</p>
419      *
420      * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
421      * @see #setIgnorableWhitespace(boolean)
422      * @since 1.1
423      */
424     protected boolean isIgnorableWhitespace()
425     {
426         return ignorableWhitespace;
427     }
428 
429     /**
430      * Specify that whitespace will be ignored. I.e.:
431      * <pre>&lt;tr&gt; &lt;td/&gt; &lt;/tr&gt;</pre>
432      * is equivalent to
433      * <pre>&lt;tr&gt;&lt;td/&gt;&lt;/tr&gt;</pre>
434      *
435      * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
436      * @since 1.1
437      */
438     protected void setIgnorableWhitespace( boolean ignorable )
439     {
440         this.ignorableWhitespace = ignorable;
441     }
442 
443     /**
444      * <p>isCollapsibleWhitespace.</p>
445      *
446      * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
447      * @see #setCollapsibleWhitespace(boolean)
448      * @since 1.1
449      */
450     protected boolean isCollapsibleWhitespace()
451     {
452         return collapsibleWhitespace;
453     }
454 
455     /**
456      * Specify that text will be collapsed. I.e.:
457      * <pre>Text   Text</pre>
458      * is equivalent to
459      * <pre>Text Text</pre>
460      *
461      * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
462      * @since 1.1
463      */
464     protected void setCollapsibleWhitespace( boolean collapsible )
465     {
466         this.collapsibleWhitespace = collapsible;
467     }
468 
469     /**
470      * <p>isTrimmableWhitespace.</p>
471      *
472      * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
473      * @see #setTrimmableWhitespace(boolean)
474      * @since 1.1
475      */
476     protected boolean isTrimmableWhitespace()
477     {
478         return trimmableWhitespace;
479     }
480 
481     /**
482      * Specify that text will be collapsed. I.e.:
483      * <pre>&lt;p&gt; Text &lt;/p&gt;</pre>
484      * is equivalent to
485      * <pre>&lt;p&gt;Text&lt;/p&gt;</pre>
486      *
487      * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
488      * @since 1.1
489      */
490     protected void setTrimmableWhitespace( boolean trimmable )
491     {
492         this.trimmableWhitespace = trimmable;
493     }
494 
495     /**
496      * <p>getText.</p>
497      *
498      * @param parser A parser, not null.
499      * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
500      * @see XmlPullParser#getText()
501      * @see #isCollapsibleWhitespace()
502      * @see #isTrimmableWhitespace()
503      * @since 1.1
504      */
505     protected String getText( XmlPullParser parser )
506     {
507         String text = parser.getText();
508 
509         if ( isTrimmableWhitespace() )
510         {
511             text = text.trim();
512         }
513 
514         if ( isCollapsibleWhitespace() )
515         {
516             StringBuilder newText = new StringBuilder();
517             String[] elts = StringUtils.split( text, " \r\n" );
518             for ( int i = 0; i < elts.length; i++ )
519             {
520                 newText.append( elts[i] );
521                 if ( ( i + 1 ) < elts.length )
522                 {
523                     newText.append( " " );
524                 }
525             }
526             text = newText.toString();
527         }
528 
529         return text;
530     }
531 
532     /**
533      * Return the defined entities in a local doctype. I.e.:
534      * <pre>
535      * &lt;!DOCTYPE foo [
536      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
537      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
538      * ]&gt;
539      * </pre>
540      *
541      * @return a map of the defined entities in a local doctype.
542      * @since 1.1
543      */
544     protected Map<String, String> getLocalEntities()
545     {
546         if ( entities == null )
547         {
548             entities = new LinkedHashMap<String, String>();
549         }
550 
551         return entities;
552     }
553 
554     /**
555      * <p>isValidate.</p>
556      *
557      * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
558      * @since 1.1
559      */
560     public boolean isValidate()
561     {
562         return validate;
563     }
564 
565     /**
566      * Specify a flag to validate or not the XML content.
567      *
568      * @param validate the validate to set
569      * @see #parse(Reader, Sink)
570      * @since 1.1
571      */
572     public void setValidate( boolean validate )
573     {
574         this.validate = validate;
575     }
576 
577     // ----------------------------------------------------------------------
578     // Private methods
579     // ----------------------------------------------------------------------
580 
581     /**
582      * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
583      * <br/>
584      * By default, we exclude the default XML entities: &#38;amp;, &#38;lt;, &#38;gt;, &#38;quot; and &#38;apos;.
585      *
586      * @param parser not null
587      * @param entityName not null
588      * @param entityValue not null
589      * @throws XmlPullParserException if any
590      * @see {@link XmlPullParser#defineEntityReplacementText(String, String)}
591      */
592     private void addEntity( XmlPullParser parser, String entityName, String entityValue )
593         throws XmlPullParserException
594     {
595         if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
596             || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
597         {
598             return;
599         }
600 
601         parser.defineEntityReplacementText( entityName, entityValue );
602         getLocalEntities().put( entityName, entityValue );
603     }
604 
605     /**
606      * Handle entities defined in a local doctype as the following:
607      * <pre>
608      * &lt;!DOCTYPE foo [
609      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
610      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
611      * ]&gt;
612      * </pre>
613      *
614      * @param parser not null
615      * @param text not null
616      * @throws XmlPullParserException if any
617      */
618     private void addLocalEntities( XmlPullParser parser, String text )
619         throws XmlPullParserException
620     {
621         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
622         if ( entitiesCount > 0 )
623         {
624             // text should be foo [...]
625             int start = text.indexOf( '[');
626             int end = text.lastIndexOf( ']');
627             if ( start != -1 && end != -1 )
628             {
629                 addDTDEntities( parser, text.substring( start + 1, end ) );
630             }
631         }
632     }
633 
634     /**
635      * Handle entities defined in external doctypes as the following:
636      * <pre>
637      * &lt;!DOCTYPE foo [
638      *   &lt;!-- These are the entity sets for ISO Latin 1 characters for the XHTML --&gt;
639      *   &lt;!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
640      *          "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"&gt;
641      *   %HTMLlat1;
642      * ]&gt;
643      * </pre>
644      *
645      * @param parser not null
646      * @param text not null
647      * @throws XmlPullParserException if any
648      */
649     private void addDTDEntities( XmlPullParser parser, String text )
650         throws XmlPullParserException
651     {
652         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
653         if ( entitiesCount > 0 )
654         {
655             final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
656             BufferedReader reader = new BufferedReader( new StringReader( txt ) );
657             String line;
658             String tmpLine = "";
659             try
660             {
661                 Matcher matcher;
662                 while ( ( line = reader.readLine() ) != null )
663                 {
664                     tmpLine += "\n" + line;
665                     matcher = PATTERN_ENTITY_1.matcher( tmpLine );
666                     if ( matcher.find() && matcher.groupCount() == 7 )
667                     {
668                         String entityName = matcher.group( 2 );
669                         String entityValue = matcher.group( 5 );
670 
671                         addEntity( parser, entityName, entityValue );
672                         tmpLine = "";
673                     }
674                     else
675                     {
676                         matcher = PATTERN_ENTITY_2.matcher( tmpLine );
677                         if ( matcher.find() && matcher.groupCount() == 8 )
678                         {
679                             String entityName = matcher.group( 2 );
680                             String entityValue = matcher.group( 5 );
681 
682                             addEntity( parser, entityName, entityValue );
683                             tmpLine = "";
684                         }
685                     }
686                 }
687             }
688             catch ( IOException e )
689             {
690                 // nop
691             }
692             finally
693             {
694                 IOUtil.close( reader );
695             }
696         }
697     }
698 
699     /**
700      * Implementation of the callback mechanism <code>EntityResolver</code>.
701      * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
702      */
703     public static class CachedFileEntityResolver
704         implements EntityResolver
705     {
706         /** Map with systemId as key and the content of systemId as byte[]. */
707         protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<String, byte[]>();
708 
709         /** {@inheritDoc} */
710         public InputSource resolveEntity( String publicId, String systemId )
711             throws SAXException, IOException
712         {
713             byte[] res = ENTITY_CACHE.get( systemId );
714             // already cached?
715             if ( res == null )
716             {
717                 String systemName = FileUtils.getFile( systemId ).getName();
718                 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
719                 // maybe already as a temp file?
720                 if ( !temp.exists() )
721                 {
722                     // is systemId a file or an url?
723                     if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
724                     {
725                         // Doxia XSDs are included in the jars, so try to find the resource systemName from
726                         // the classpath...
727                         String resource = "/" + systemName;
728                         URL url = getClass().getResource( resource );
729                         if ( url != null )
730                         {
731                             res = toByteArray( url );
732                         }
733                         else
734                         {
735                             throw new SAXException( "Could not find the SYSTEM entity: " + systemId
736                             + " because '" + resource + "' is not available of the classpath." );
737                         }
738                     }
739                     else
740                     {
741                         res = toByteArray( new URL( systemId ) );
742                     }
743 
744                     // write systemId as temp file
745                     copy( res, temp );
746                 }
747                 else
748                 {
749                     // TODO How to refresh Doxia XSDs from temp dir?
750                     res = toByteArray( temp.toURI().toURL() );
751                 }
752 
753                 ENTITY_CACHE.put( systemId, res );
754             }
755 
756             InputSource is = new InputSource( new ByteArrayInputStream( res ) );
757             is.setPublicId( publicId );
758             is.setSystemId( systemId );
759 
760             return is;
761         }
762 
763         /**
764          * If url is not an http/https urls, call {@link IOUtil#toByteArray(java.io.InputStream)} to get the url
765          * content.
766          * Otherwise, use HttpClient to get the http content.
767          * Wrap all internal exceptions to throw SAXException.
768          *
769          * @param url not null
770          * @return return an array of byte
771          * @throws SAXException if any
772          */
773         private static byte[] toByteArray( URL url )
774             throws SAXException
775         {
776             if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
777             {
778                 InputStream is = null;
779                 try
780                 {
781                     is = url.openStream();
782                     if ( is == null )
783                     {
784                         throw new SAXException( "Cannot open stream from the url: " + url.toString() );
785                     }
786                     return IOUtil.toByteArray( is );
787                 }
788                 catch ( IOException e )
789                 {
790                     throw new SAXException( "IOException: " + e.getMessage(), e );
791                 }
792                 finally
793                 {
794                     IOUtil.close( is );
795                 }
796             }
797 
798             // it is an HTTP url, using HttpClient...
799             DefaultHttpClient client = new DefaultHttpClient();
800             HttpGet method = new HttpGet( url.toString() );
801             // Set a user-agent that doesn't contain the word "java", otherwise it will be blocked by the W3C
802             // The default user-agent is "Apache-HttpClient/4.0.2 (java 1.5)"
803             method.setHeader( "user-agent", "Apache-Doxia/" + doxiaVersion() );
804 
805             HttpRequestRetryHandler retryHandler = new DefaultHttpRequestRetryHandler( 3, false );
806             client.setHttpRequestRetryHandler( retryHandler );
807 
808             HttpEntity entity = null;
809             try
810             {
811                 HttpResponse response = client.execute( method );
812                 int statusCode = response.getStatusLine().getStatusCode();
813                 if ( statusCode != HttpStatus.SC_OK )
814                 {
815                     throw new IOException( "The status code when accessing the URL '" + url.toString() + "' was "
816                         + statusCode + ", which is not allowed. The server gave this reason for the failure '"
817                         + response.getStatusLine().getReasonPhrase() + "'." );
818                 }
819 
820                 entity = response.getEntity();
821                 return EntityUtils.toByteArray( entity );
822             }
823             catch ( ClientProtocolException e )
824             {
825                 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
826             }
827             catch ( IOException e )
828             {
829                 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
830             }
831             finally
832             {
833                 if ( entity != null )
834                 {
835                     try
836                     {
837                         entity.consumeContent();
838                     }
839                     catch ( IOException e )
840                     {
841                         // Ignore
842                     }
843                 }
844             }
845         }
846 
847         /**
848          * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException.
849          *
850          * @param res not null array of byte
851          * @param f the file where to write the bytes
852          * @throws SAXException if any
853          * @see {@link IOUtil#copy(byte[], OutputStream)}
854          */
855         private void copy( byte[] res, File f )
856             throws SAXException
857         {
858             if ( f.isDirectory() )
859             {
860                 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
861             }
862 
863             OutputStream os = null;
864             try
865             {
866                 os = new FileOutputStream( f );
867                 IOUtil.copy( res, os );
868             }
869             catch ( IOException e )
870             {
871                 throw new SAXException( "IOException: " + e.getMessage(), e );
872             }
873             finally
874             {
875                 IOUtil.close( os );
876             }
877         }
878     }
879 }