View Javadoc

1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.BufferedReader;
23  import java.io.ByteArrayInputStream;
24  import java.io.File;
25  import java.io.FileOutputStream;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.io.OutputStream;
29  import java.io.Reader;
30  import java.io.StringReader;
31  import java.net.URL;
32  import java.util.Hashtable;
33  import java.util.LinkedHashMap;
34  import java.util.Locale;
35  import java.util.Map;
36  import java.util.regex.Matcher;
37  import java.util.regex.Pattern;
38  
39  import org.apache.http.HttpEntity;
40  import org.apache.http.HttpResponse;
41  import org.apache.http.HttpStatus;
42  import org.apache.http.client.ClientProtocolException;
43  import org.apache.http.client.HttpRequestRetryHandler;
44  import org.apache.http.client.methods.HttpGet;
45  import org.apache.http.impl.client.DefaultHttpClient;
46  import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
47  import org.apache.http.util.EntityUtils;
48  
49  import org.apache.maven.doxia.macro.MacroExecutionException;
50  import org.apache.maven.doxia.markup.XmlMarkup;
51  import org.apache.maven.doxia.sink.Sink;
52  import org.apache.maven.doxia.sink.SinkEventAttributeSet;
53  import org.apache.maven.doxia.util.HtmlTools;
54  import org.apache.maven.doxia.util.XmlValidator;
55  
56  import org.codehaus.plexus.util.FileUtils;
57  import org.codehaus.plexus.util.IOUtil;
58  import org.codehaus.plexus.util.StringUtils;
59  import org.codehaus.plexus.util.xml.pull.MXParser;
60  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
61  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
62  
63  import org.xml.sax.EntityResolver;
64  import org.xml.sax.InputSource;
65  import org.xml.sax.SAXException;
66  
67  /**
68   * An abstract class that defines some convenience methods for <code>XML</code> parsers.
69   *
70   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
71   * @version $Id: AbstractXmlParser.java 1094965 2011-04-19 09:30:13Z ltheussl $
72   * @since 1.0
73   */
74  public abstract class AbstractXmlParser
75      extends AbstractParser
76      implements XmlMarkup
77  {
78      /**
79       * Entity pattern for HTML entity, i.e. &#38;nbsp;
80       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
81       * <br/>
82       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
83       */
84      private static final Pattern PATTERN_ENTITY_1 =
85          Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
86  
87      /**
88       * Entity pattern for Unicode entity, i.e. &#38;#38;
89       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
90       * <br/>
91       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
92       */
93      private static final Pattern PATTERN_ENTITY_2 =
94          Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
95  
96      private boolean ignorableWhitespace;
97  
98      private boolean collapsibleWhitespace;
99  
100     private boolean trimmableWhitespace;
101 
102     private Map<String, String> entities;
103 
104     private boolean validate = false;
105 
106     /** {@inheritDoc} */
107     public void parse( Reader source, Sink sink )
108         throws ParseException
109     {
110         init();
111 
112         // 1 first parsing if validation is required
113         if ( isValidate() )
114         {
115             String content;
116             try
117             {
118                 content = IOUtil.toString( new BufferedReader( source ) );
119             }
120             catch ( IOException e )
121             {
122                 throw new ParseException( "Error reading the model: " + e.getMessage(), e );
123             }
124 
125             new XmlValidator( getLog() ).validate( content );
126 
127             source = new StringReader( content );
128         }
129 
130         // 2 second parsing to process
131         try
132         {
133             XmlPullParser parser = new MXParser();
134 
135             parser.setInput( source );
136 
137             sink.enableLogging( getLog() );
138 
139             parseXml( parser, sink );
140         }
141         catch ( XmlPullParserException ex )
142         {
143             throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
144                                       ex.getColumnNumber() );
145         }
146         catch ( MacroExecutionException ex )
147         {
148             throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
149         }
150 
151         setSecondParsing( false );
152         init();
153     }
154 
155     /**
156      * {@inheritDoc}
157      *
158      * Convenience method to parse an arbitrary string and emit any xml events into the given sink.
159      */
160     public void parse( String string, Sink sink )
161         throws ParseException
162     {
163         super.parse( string, sink );
164     }
165 
166     /** {@inheritDoc} */
167     public final int getType()
168     {
169         return XML_TYPE;
170     }
171 
172     /**
173      * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
174      *
175      * @param parser A parser, not null.
176      * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
177      * @since 1.1
178      */
179     protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
180     {
181         int count = parser.getAttributeCount();
182 
183         if ( count < 0 )
184         {
185             return null;
186         }
187 
188         SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
189 
190         for ( int i = 0; i < count; i++ )
191         {
192             atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
193         }
194 
195         return atts;
196     }
197 
198     /**
199      * Parse the model from the XmlPullParser into the given sink.
200      *
201      * @param parser A parser, not null.
202      * @param sink the sink to receive the events.
203      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
204      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
205      */
206     private void parseXml( XmlPullParser parser, Sink sink )
207         throws XmlPullParserException, MacroExecutionException
208     {
209         int eventType = parser.getEventType();
210 
211         while ( eventType != XmlPullParser.END_DOCUMENT )
212         {
213             if ( eventType == XmlPullParser.START_TAG )
214             {
215                 handleStartTag( parser, sink );
216             }
217             else if ( eventType == XmlPullParser.END_TAG )
218             {
219                 handleEndTag( parser, sink );
220             }
221             else if ( eventType == XmlPullParser.TEXT )
222             {
223                 String text = getText( parser );
224 
225                 if ( isIgnorableWhitespace() )
226                 {
227                     if ( !text.trim().equals( "" ) )
228                     {
229                         handleText( parser, sink );
230                     }
231                 }
232                 else
233                 {
234                     handleText( parser, sink );
235                 }
236             }
237             else if ( eventType == XmlPullParser.CDSECT )
238             {
239                 handleCdsect( parser, sink );
240             }
241             else if ( eventType == XmlPullParser.COMMENT )
242             {
243                 handleComment( parser, sink );
244             }
245             else if ( eventType == XmlPullParser.ENTITY_REF )
246             {
247                 handleEntity( parser, sink );
248             }
249             else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
250             {
251                 // nop
252             }
253             else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
254             {
255                 // nop
256             }
257             else if ( eventType == XmlPullParser.DOCDECL )
258             {
259                 addLocalEntities( parser, parser.getText() );
260 
261                 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
262                 {
263                     addDTDEntities( parser, new String( res ) );
264                 }
265             }
266 
267             try
268             {
269                 eventType = parser.nextToken();
270             }
271             catch ( IOException io )
272             {
273                 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
274             }
275         }
276     }
277 
278     /**
279      * Goes through the possible start tags.
280      *
281      * @param parser A parser, not null.
282      * @param sink the sink to receive the events.
283      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
284      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
285      */
286     protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
287         throws XmlPullParserException, MacroExecutionException;
288 
289     /**
290      * Goes through the possible end tags.
291      *
292      * @param parser A parser, not null.
293      * @param sink the sink to receive the events.
294      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
295      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
296      */
297     protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
298         throws XmlPullParserException, MacroExecutionException;
299 
300     /**
301      * Handles text events.
302      *
303      * <p>This is a default implementation, if the parser points to a non-empty text element,
304      * it is emitted as a text event into the specified sink.</p>
305      *
306      * @param parser A parser, not null.
307      * @param sink the sink to receive the events. Not null.
308      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
309      */
310     protected void handleText( XmlPullParser parser, Sink sink )
311         throws XmlPullParserException
312     {
313         String text = getText( parser );
314 
315         /*
316          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
317          * parser so any whitespace that makes it here is significant.
318          */
319         if ( StringUtils.isNotEmpty( text ) )
320         {
321             sink.text( text );
322         }
323     }
324 
325     /**
326      * Handles CDATA sections.
327      *
328      * <p>This is a default implementation, all data are emitted as text
329      * events into the specified sink.</p>
330      *
331      * @param parser A parser, not null.
332      * @param sink the sink to receive the events. Not null.
333      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
334      */
335     protected void handleCdsect( XmlPullParser parser, Sink sink )
336         throws XmlPullParserException
337     {
338         sink.text( getText( parser ) );
339     }
340 
341     /**
342      * Handles comments.
343      *
344      * <p>This is a default implementation, all data are emitted as comment
345      * events into the specified sink.</p>
346      *
347      * @param parser A parser, not null.
348      * @param sink the sink to receive the events. Not null.
349      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
350      */
351     protected void handleComment( XmlPullParser parser, Sink sink )
352         throws XmlPullParserException
353     {
354         sink.comment( getText( parser ).trim() );
355     }
356 
357     /**
358      * Handles entities.
359      *
360      * <p>This is a default implementation, all entities are resolved and emitted as text
361      * events into the specified sink, except:</p>
362      * <ul>
363      * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
364      * are emitted as <code>nonBreakingSpace()</code> events.</li>
365      * </ul>
366      *
367      * @param parser A parser, not null.
368      * @param sink the sink to receive the events. Not null.
369      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
370      */
371     protected void handleEntity( XmlPullParser parser, Sink sink )
372         throws XmlPullParserException
373     {
374         String text = getText( parser );
375 
376         String name = parser.getName();
377 
378         if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
379         {
380             sink.nonBreakingSpace();
381         }
382         else
383         {
384             String unescaped = HtmlTools.unescapeHTML( text );
385 
386             sink.text( unescaped );
387         }
388     }
389 
390     /**
391      * Handles an unkown event.
392      *
393      * <p>This is a default implementation, all events are emitted as unknown
394      * events into the specified sink.</p>
395      *
396      * @param parser the parser to get the event from.
397      * @param sink the sink to receive the event.
398      * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
399      * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
400      * It will be passed as the first argument of the required parameters to the Sink
401      * {@link org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
402      * method.
403      */
404     protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
405     {
406         Object[] required = new Object[] { new Integer( type ) };
407 
408         SinkEventAttributeSet attribs = getAttributesFromParser( parser );
409 
410         sink.unknown( parser.getName(), required, attribs );
411     }
412 
413     /**
414      * <p>isIgnorableWhitespace.</p>
415      *
416      * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
417      * @see #setIgnorableWhitespace(boolean)
418      * @since 1.1
419      */
420     protected boolean isIgnorableWhitespace()
421     {
422         return ignorableWhitespace;
423     }
424 
425     /**
426      * Specify that whitespace will be ignored. I.e.:
427      * <pre>&lt;tr&gt; &lt;td/&gt; &lt;/tr&gt;</pre>
428      * is equivalent to
429      * <pre>&lt;tr&gt;&lt;td/&gt;&lt;/tr&gt;</pre>
430      *
431      * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
432      * @since 1.1
433      */
434     protected void setIgnorableWhitespace( boolean ignorable )
435     {
436         this.ignorableWhitespace = ignorable;
437     }
438 
439     /**
440      * <p>isCollapsibleWhitespace.</p>
441      *
442      * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
443      * @see #setCollapsibleWhitespace(boolean)
444      * @since 1.1
445      */
446     protected boolean isCollapsibleWhitespace()
447     {
448         return collapsibleWhitespace;
449     }
450 
451     /**
452      * Specify that text will be collapsed. I.e.:
453      * <pre>Text   Text</pre>
454      * is equivalent to
455      * <pre>Text Text</pre>
456      *
457      * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
458      * @since 1.1
459      */
460     protected void setCollapsibleWhitespace( boolean collapsible )
461     {
462         this.collapsibleWhitespace = collapsible;
463     }
464 
465     /**
466      * <p>isTrimmableWhitespace.</p>
467      *
468      * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
469      * @see #setTrimmableWhitespace(boolean)
470      * @since 1.1
471      */
472     protected boolean isTrimmableWhitespace()
473     {
474         return trimmableWhitespace;
475     }
476 
477     /**
478      * Specify that text will be collapsed. I.e.:
479      * <pre>&lt;p&gt; Text &lt;/p&gt;</pre>
480      * is equivalent to
481      * <pre>&lt;p&gt;Text&lt;/p&gt;</pre>
482      *
483      * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
484      * @since 1.1
485      */
486     protected void setTrimmableWhitespace( boolean trimmable )
487     {
488         this.trimmableWhitespace = trimmable;
489     }
490 
491     /**
492      * <p>getText.</p>
493      *
494      * @param parser A parser, not null.
495      * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
496      * @see XmlPullParser#getText()
497      * @see #isCollapsibleWhitespace()
498      * @see #isTrimmableWhitespace()
499      * @since 1.1
500      */
501     protected String getText( XmlPullParser parser )
502     {
503         String text = parser.getText();
504 
505         if ( isTrimmableWhitespace() )
506         {
507             text = text.trim();
508         }
509 
510         if ( isCollapsibleWhitespace() )
511         {
512             StringBuffer newText = new StringBuffer();
513             String[] elts = StringUtils.split( text, " \r\n" );
514             for ( int i = 0; i < elts.length; i++ )
515             {
516                 newText.append( elts[i] );
517                 if ( ( i + 1 ) < elts.length )
518                 {
519                     newText.append( " " );
520                 }
521             }
522             text = newText.toString();
523         }
524 
525         return text;
526     }
527 
528     /**
529      * Return the defined entities in a local doctype. I.e.:
530      * <pre>
531      * &lt;!DOCTYPE foo [
532      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
533      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
534      * ]&gt;
535      * </pre>
536      *
537      * @return a map of the defined entities in a local doctype.
538      * @since 1.1
539      */
540     protected Map<String, String> getLocalEntities()
541     {
542         if ( entities == null )
543         {
544             entities = new LinkedHashMap<String, String>();
545         }
546 
547         return entities;
548     }
549 
550     /**
551      * <p>isValidate.</p>
552      *
553      * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
554      * @since 1.1
555      */
556     public boolean isValidate()
557     {
558         return validate;
559     }
560 
561     /**
562      * Specify a flag to validate or not the XML content.
563      *
564      * @param validate the validate to set
565      * @see #parse(Reader, Sink)
566      * @since 1.1
567      */
568     public void setValidate( boolean validate )
569     {
570         this.validate = validate;
571     }
572 
573     // ----------------------------------------------------------------------
574     // Private methods
575     // ----------------------------------------------------------------------
576 
577     /**
578      * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
579      * <br/>
580      * By default, we exclude the default XML entities: &#38;amp;, &#38;lt;, &#38;gt;, &#38;quot; and &#38;apos;.
581      *
582      * @param parser not null
583      * @param entityName not null
584      * @param entityValue not null
585      * @throws XmlPullParserException if any
586      * @see {@link XmlPullParser#defineEntityReplacementText(String, String)}
587      */
588     private void addEntity( XmlPullParser parser, String entityName, String entityValue )
589         throws XmlPullParserException
590     {
591         if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
592             || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
593         {
594             return;
595         }
596 
597         parser.defineEntityReplacementText( entityName, entityValue );
598         getLocalEntities().put( entityName, entityValue );
599     }
600 
601     /**
602      * Handle entities defined in a local doctype as the following:
603      * <pre>
604      * &lt;!DOCTYPE foo [
605      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
606      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
607      * ]&gt;
608      * </pre>
609      *
610      * @param parser not null
611      * @param text not null
612      * @throws XmlPullParserException if any
613      */
614     private void addLocalEntities( XmlPullParser parser, String text )
615         throws XmlPullParserException
616     {
617         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
618         if ( entitiesCount > 0 )
619         {
620             // text should be foo [...]
621             int start = text.indexOf( "[" );
622             int end = text.lastIndexOf( "]" );
623             if ( start != -1 && end != -1 )
624             {
625                 text = text.substring( start + 1, end );
626                 addDTDEntities( parser, text );
627             }
628         }
629     }
630 
631     /**
632      * Handle entities defined in external doctypes as the following:
633      * <pre>
634      * &lt;!DOCTYPE foo [
635      *   &lt;!-- These are the entity sets for ISO Latin 1 characters for the XHTML --&gt;
636      *   &lt;!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
637      *          "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"&gt;
638      *   %HTMLlat1;
639      * ]&gt;
640      * </pre>
641      *
642      * @param parser not null
643      * @param text not null
644      * @throws XmlPullParserException if any
645      */
646     private void addDTDEntities( XmlPullParser parser, String text )
647         throws XmlPullParserException
648     {
649         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
650         if ( entitiesCount > 0 )
651         {
652             text = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
653             BufferedReader reader = new BufferedReader( new StringReader( text ) );
654             String line;
655             String tmpLine = "";
656             try
657             {
658                 Matcher matcher;
659                 while ( ( line = reader.readLine() ) != null )
660                 {
661                     tmpLine += "\n" + line;
662                     matcher = PATTERN_ENTITY_1.matcher( tmpLine );
663                     if ( matcher.find() && matcher.groupCount() == 7 )
664                     {
665                         String entityName = matcher.group( 2 );
666                         String entityValue = matcher.group( 5 );
667 
668                         addEntity( parser, entityName, entityValue );
669                         tmpLine = "";
670                     }
671                     else
672                     {
673                         matcher = PATTERN_ENTITY_2.matcher( tmpLine );
674                         if ( matcher.find() && matcher.groupCount() == 8 )
675                         {
676                             String entityName = matcher.group( 2 );
677                             String entityValue = matcher.group( 5 );
678 
679                             addEntity( parser, entityName, entityValue );
680                             tmpLine = "";
681                         }
682                     }
683                 }
684             }
685             catch ( IOException e )
686             {
687                 // nop
688             }
689             finally
690             {
691                 IOUtil.close( reader );
692             }
693         }
694     }
695 
696     /**
697      * Implementation of the callback mechanism <code>EntityResolver</code>.
698      * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
699      */
700     public static class CachedFileEntityResolver
701         implements EntityResolver
702     {
703         /** Map with systemId as key and the content of systemId as byte[]. */
704         protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<String, byte[]>();
705 
706         /** {@inheritDoc} */
707         public InputSource resolveEntity( String publicId, String systemId )
708             throws SAXException, IOException
709         {
710             byte[] res = ENTITY_CACHE.get( systemId );
711             // already cached?
712             if ( res == null )
713             {
714                 String systemName = FileUtils.getFile( systemId ).getName();
715                 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
716                 // maybe already as a temp file?
717                 if ( !temp.exists() )
718                 {
719                     // is systemId a file or an url?
720                     if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
721                     {
722                         // Doxia XSDs are included in the jars, so try to find the resource systemName from
723                         // the classpath...
724                         String resource = "/" + systemName;
725                         URL url = getClass().getResource( resource );
726                         if ( url != null )
727                         {
728                             res = toByteArray( url );
729                         }
730                         else
731                         {
732                             throw new SAXException( "Could not find the SYSTEM entity: " + systemId
733                             + " because '" + resource + "' is not available of the classpath." );
734                         }
735                     }
736                     else
737                     {
738                         res = toByteArray( new URL( systemId ) );
739                     }
740 
741                     // write systemId as temp file
742                     copy( res, temp );
743                 }
744                 else
745                 {
746                     // TODO How to refresh Doxia XSDs from temp dir?
747                     res = toByteArray( temp.toURI().toURL() );
748                 }
749 
750                 ENTITY_CACHE.put( systemId, res );
751             }
752 
753             InputSource is = new InputSource( new ByteArrayInputStream( res ) );
754             is.setPublicId( publicId );
755             is.setSystemId( systemId );
756 
757             return is;
758         }
759 
760         /**
761          * If url is not an http/https urls, call {@link IOUtil#toByteArray(java.io.InputStream)} to get the url
762          * content.
763          * Otherwise, use HttpClient to get the http content.
764          * Wrap all internal exceptions to throw SAXException.
765          *
766          * @param url not null
767          * @return return an array of byte
768          * @throws SAXException if any
769          */
770         private static byte[] toByteArray( URL url )
771             throws SAXException
772         {
773             if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
774             {
775                 InputStream is = null;
776                 try
777                 {
778                     is = url.openStream();
779                     if ( is == null )
780                     {
781                         throw new SAXException( "Cannot open stream from the url: " + url.toString() );
782                     }
783                     return IOUtil.toByteArray( is );
784                 }
785                 catch ( IOException e )
786                 {
787                     throw new SAXException( "IOException: " + e.getMessage(), e );
788                 }
789                 finally
790                 {
791                     IOUtil.close( is );
792                 }
793             }
794 
795             // it is an HTTP url, using HttpClient...
796             DefaultHttpClient client = new DefaultHttpClient();
797             HttpGet method = new HttpGet( url.toString() );
798             // Set a user-agent that doesn't contain the word "java", otherwise it will be blocked by the W3C
799             // The default user-agent is "Apache-HttpClient/4.0.2 (java 1.5)"
800             method.setHeader( "user-agent", "Apache-Doxia/" + doxiaVersion() );
801 
802             HttpRequestRetryHandler retryHandler = new DefaultHttpRequestRetryHandler( 3, false );
803             client.setHttpRequestRetryHandler( retryHandler );
804 
805             HttpEntity entity = null;
806             try
807             {
808                 HttpResponse response = client.execute( method );
809                 int statusCode = response.getStatusLine().getStatusCode();
810                 if ( statusCode != HttpStatus.SC_OK )
811                 {
812                     throw new IOException( "The status code when accessing the URL '" + url.toString() + "' was "
813                         + statusCode + ", which is not allowed. The server gave this reason for the failure '"
814                         + response.getStatusLine().getReasonPhrase() + "'." );
815                 }
816 
817                 entity = response.getEntity();
818                 return EntityUtils.toByteArray( entity );
819             }
820             catch ( ClientProtocolException e )
821             {
822                 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
823             }
824             catch ( IOException e )
825             {
826                 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
827             }
828             finally
829             {
830                 if ( entity != null )
831                 {
832                     try
833                     {
834                         entity.consumeContent();
835                     }
836                     catch ( IOException e )
837                     {
838                         // Ignore
839                     }
840                 }
841             }
842         }
843 
844         /**
845          * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException.
846          *
847          * @param res not null array of byte
848          * @param f the file where to write the bytes
849          * @throws SAXException if any
850          * @see {@link IOUtil#copy(byte[], OutputStream)}
851          */
852         private void copy( byte[] res, File f )
853             throws SAXException
854         {
855             if ( f.isDirectory() )
856             {
857                 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
858             }
859 
860             OutputStream os = null;
861             try
862             {
863                 os = new FileOutputStream( f );
864                 IOUtil.copy( res, os );
865             }
866             catch ( IOException e )
867             {
868                 throw new SAXException( "IOException: " + e.getMessage(), e );
869             }
870             finally
871             {
872                 IOUtil.close( os );
873             }
874         }
875     }
876 }