View Javadoc
1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.BufferedReader;
23  import java.io.ByteArrayInputStream;
24  import java.io.File;
25  import java.io.FileOutputStream;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.io.OutputStream;
29  import java.io.Reader;
30  import java.io.StringReader;
31  import java.net.URL;
32  import java.util.Hashtable;
33  import java.util.LinkedHashMap;
34  import java.util.Locale;
35  import java.util.Map;
36  import java.util.regex.Matcher;
37  import java.util.regex.Pattern;
38  
39  import org.apache.http.HttpEntity;
40  import org.apache.http.HttpResponse;
41  import org.apache.http.HttpStatus;
42  import org.apache.http.client.ClientProtocolException;
43  import org.apache.http.client.HttpRequestRetryHandler;
44  import org.apache.http.client.methods.HttpGet;
45  import org.apache.http.impl.client.DefaultHttpClient;
46  import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
47  import org.apache.http.util.EntityUtils;
48  
49  import org.apache.maven.doxia.macro.MacroExecutionException;
50  import org.apache.maven.doxia.markup.XmlMarkup;
51  import org.apache.maven.doxia.sink.Sink;
52  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
53  import org.apache.maven.doxia.util.HtmlTools;
54  import org.apache.maven.doxia.util.XmlValidator;
55  
56  import org.codehaus.plexus.util.FileUtils;
57  import org.codehaus.plexus.util.IOUtil;
58  import org.codehaus.plexus.util.StringUtils;
59  import org.codehaus.plexus.util.xml.pull.MXParser;
60  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
61  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
62  
63  import org.xml.sax.EntityResolver;
64  import org.xml.sax.InputSource;
65  import org.xml.sax.SAXException;
66  
67  /**
68   * An abstract class that defines some convenience methods for <code>XML</code> parsers.
69   *
70   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
71   * @version $Id: AbstractXmlParser.java 1726411 2016-01-23 16:34:09Z hboutemy $
72   * @since 1.0
73   */
74  public abstract class AbstractXmlParser
75      extends AbstractParser
76      implements XmlMarkup
77  {
78      /**
79       * Entity pattern for HTML entity, i.e. &#38;nbsp;
80       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
81       * <br/>
82       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
83       */
84      private static final Pattern PATTERN_ENTITY_1 =
85          Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
86  
87      /**
88       * Entity pattern for Unicode entity, i.e. &#38;#38;
89       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
90       * <br/>
91       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
92       */
93      private static final Pattern PATTERN_ENTITY_2 =
94          Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
95  
96      private boolean ignorableWhitespace;
97  
98      private boolean collapsibleWhitespace;
99  
100     private boolean trimmableWhitespace;
101 
102     private Map<String, String> entities;
103 
104     private boolean validate = false;
105 
106     /** {@inheritDoc} */
107     public void parse( Reader source, Sink sink )
108         throws ParseException
109     {
110         init();
111 
112         Reader src = source;
113 
114         // 1 first parsing if validation is required
115         if ( isValidate() )
116         {
117             String content;
118             try
119             {
120                 content = IOUtil.toString( new BufferedReader( src ) );
121             }
122             catch ( IOException e )
123             {
124                 throw new ParseException( "Error reading the model: " + e.getMessage(), e );
125             }
126 
127             new XmlValidator( getLog() ).validate( content );
128 
129             src = new StringReader( content );
130         }
131 
132         // 2 second parsing to process
133         try
134         {
135             XmlPullParser parser = new MXParser();
136 
137             parser.setInput( src );
138             
139             // allow parser initialization, e.g. for additional entities in XHTML
140             // Note: do it after input is set, otherwise values are reset
141             initXmlParser( parser );
142 
143             sink.enableLogging( getLog() );
144 
145             parseXml( parser, sink );
146         }
147         catch ( XmlPullParserException ex )
148         {
149             throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
150                                       ex.getColumnNumber() );
151         }
152         catch ( MacroExecutionException ex )
153         {
154             throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
155         }
156 
157         setSecondParsing( false );
158         init();
159     }
160     
161     /**
162      * Initializes the parser with custom entities or other options.
163      *
164      * @param parser A parser, not null.
165      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser
166      */
167     protected void initXmlParser( XmlPullParser parser )
168         throws XmlPullParserException
169     {
170         // nop
171     }
172 
173     /**
174      * {@inheritDoc}
175      *
176      * Convenience method to parse an arbitrary string and emit any xml events into the given sink.
177      */
178     @Override
179     public void parse( String string, Sink sink )
180         throws ParseException
181     {
182         super.parse( string, sink );
183     }
184 
185     /** {@inheritDoc} */
186     @Override
187     public final int getType()
188     {
189         return XML_TYPE;
190     }
191 
192     /**
193      * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
194      *
195      * @param parser A parser, not null.
196      * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
197      * @since 1.1
198      */
199     protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
200     {
201         int count = parser.getAttributeCount();
202 
203         if ( count < 0 )
204         {
205             return null;
206         }
207 
208         SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
209 
210         for ( int i = 0; i < count; i++ )
211         {
212             atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
213         }
214 
215         return atts;
216     }
217 
218     /**
219      * Parse the model from the XmlPullParser into the given sink.
220      *
221      * @param parser A parser, not null.
222      * @param sink the sink to receive the events.
223      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
224      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
225      */
226     private void parseXml( XmlPullParser parser, Sink sink )
227         throws XmlPullParserException, MacroExecutionException
228     {
229         int eventType = parser.getEventType();
230 
231         while ( eventType != XmlPullParser.END_DOCUMENT )
232         {
233             if ( eventType == XmlPullParser.START_TAG )
234             {
235                 handleStartTag( parser, sink );
236             }
237             else if ( eventType == XmlPullParser.END_TAG )
238             {
239                 handleEndTag( parser, sink );
240             }
241             else if ( eventType == XmlPullParser.TEXT )
242             {
243                 String text = getText( parser );
244 
245                 if ( isIgnorableWhitespace() )
246                 {
247                     if ( text.trim().length() != 0 )
248                     {
249                         handleText( parser, sink );
250                     }
251                 }
252                 else
253                 {
254                     handleText( parser, sink );
255                 }
256             }
257             else if ( eventType == XmlPullParser.CDSECT )
258             {
259                 handleCdsect( parser, sink );
260             }
261             else if ( eventType == XmlPullParser.COMMENT )
262             {
263                 handleComment( parser, sink );
264             }
265             else if ( eventType == XmlPullParser.ENTITY_REF )
266             {
267                 handleEntity( parser, sink );
268             }
269             else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
270             {
271                 // nop
272             }
273             else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
274             {
275                 // nop
276             }
277             else if ( eventType == XmlPullParser.DOCDECL )
278             {
279                 addLocalEntities( parser, parser.getText() );
280 
281                 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
282                 {
283                     addDTDEntities( parser, new String( res ) );
284                 }
285             }
286 
287             try
288             {
289                 eventType = parser.nextToken();
290             }
291             catch ( IOException io )
292             {
293                 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
294             }
295         }
296     }
297 
298     /**
299      * Goes through the possible start tags.
300      *
301      * @param parser A parser, not null.
302      * @param sink the sink to receive the events.
303      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
304      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
305      */
306     protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
307         throws XmlPullParserException, MacroExecutionException;
308 
309     /**
310      * Goes through the possible end tags.
311      *
312      * @param parser A parser, not null.
313      * @param sink the sink to receive the events.
314      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
315      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
316      */
317     protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
318         throws XmlPullParserException, MacroExecutionException;
319 
320     /**
321      * Handles text events.
322      *
323      * <p>This is a default implementation, if the parser points to a non-empty text element,
324      * it is emitted as a text event into the specified sink.</p>
325      *
326      * @param parser A parser, not null.
327      * @param sink the sink to receive the events. Not null.
328      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
329      */
330     protected void handleText( XmlPullParser parser, Sink sink )
331         throws XmlPullParserException
332     {
333         String text = getText( parser );
334 
335         /*
336          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
337          * parser so any whitespace that makes it here is significant.
338          */
339         if ( StringUtils.isNotEmpty( text ) )
340         {
341             sink.text( text );
342         }
343     }
344 
345     /**
346      * Handles CDATA sections.
347      *
348      * <p>This is a default implementation, all data are emitted as text
349      * events into the specified sink.</p>
350      *
351      * @param parser A parser, not null.
352      * @param sink the sink to receive the events. Not null.
353      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
354      */
355     protected void handleCdsect( XmlPullParser parser, Sink sink )
356         throws XmlPullParserException
357     {
358         sink.text( getText( parser ) );
359     }
360 
361     /**
362      * Handles comments.
363      *
364      * <p>This is a default implementation, all data are emitted as comment
365      * events into the specified sink.</p>
366      *
367      * @param parser A parser, not null.
368      * @param sink the sink to receive the events. Not null.
369      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
370      */
371     protected void handleComment( XmlPullParser parser, Sink sink )
372         throws XmlPullParserException
373     {
374         if ( isEmitComments() )
375         {
376             sink.comment( getText( parser ) );
377         }
378     }
379 
380     /**
381      * Handles entities.
382      *
383      * <p>This is a default implementation, all entities are resolved and emitted as text
384      * events into the specified sink, except:</p>
385      * <ul>
386      * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
387      * are emitted as <code>nonBreakingSpace()</code> events.</li>
388      * </ul>
389      *
390      * @param parser A parser, not null.
391      * @param sink the sink to receive the events. Not null.
392      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
393      */
394     protected void handleEntity( XmlPullParser parser, Sink sink )
395         throws XmlPullParserException
396     {
397         String text = getText( parser );
398 
399         String name = parser.getName();
400 
401         if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
402         {
403             sink.nonBreakingSpace();
404         }
405         else
406         {
407             String unescaped = HtmlTools.unescapeHTML( text );
408 
409             sink.text( unescaped );
410         }
411     }
412 
413     /**
414      * Handles an unknown event.
415      *
416      * <p>This is a default implementation, all events are emitted as unknown
417      * events into the specified sink.</p>
418      *
419      * @param parser the parser to get the event from.
420      * @param sink the sink to receive the event.
421      * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
422      * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
423      * It will be passed as the first argument of the required parameters to the Sink
424      * {@link
425      * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
426      * method.
427      */
428     protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
429     {
430         Object[] required = new Object[] { Integer.valueOf( type ) };
431 
432         SinkEventAttributeSet attribs = getAttributesFromParser( parser );
433 
434         sink.unknown( parser.getName(), required, attribs );
435     }
436 
437     /**
438      * <p>isIgnorableWhitespace.</p>
439      *
440      * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
441      * @see #setIgnorableWhitespace(boolean)
442      * @since 1.1
443      */
444     protected boolean isIgnorableWhitespace()
445     {
446         return ignorableWhitespace;
447     }
448 
449     /**
450      * Specify that whitespace will be ignored. I.e.:
451      * <pre>&lt;tr&gt; &lt;td/&gt; &lt;/tr&gt;</pre>
452      * is equivalent to
453      * <pre>&lt;tr&gt;&lt;td/&gt;&lt;/tr&gt;</pre>
454      *
455      * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
456      * @since 1.1
457      */
458     protected void setIgnorableWhitespace( boolean ignorable )
459     {
460         this.ignorableWhitespace = ignorable;
461     }
462 
463     /**
464      * <p>isCollapsibleWhitespace.</p>
465      *
466      * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
467      * @see #setCollapsibleWhitespace(boolean)
468      * @since 1.1
469      */
470     protected boolean isCollapsibleWhitespace()
471     {
472         return collapsibleWhitespace;
473     }
474 
475     /**
476      * Specify that text will be collapsed. I.e.:
477      * <pre>Text   Text</pre>
478      * is equivalent to
479      * <pre>Text Text</pre>
480      *
481      * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
482      * @since 1.1
483      */
484     protected void setCollapsibleWhitespace( boolean collapsible )
485     {
486         this.collapsibleWhitespace = collapsible;
487     }
488 
489     /**
490      * <p>isTrimmableWhitespace.</p>
491      *
492      * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
493      * @see #setTrimmableWhitespace(boolean)
494      * @since 1.1
495      */
496     protected boolean isTrimmableWhitespace()
497     {
498         return trimmableWhitespace;
499     }
500 
501     /**
502      * Specify that text will be collapsed. I.e.:
503      * <pre>&lt;p&gt; Text &lt;/p&gt;</pre>
504      * is equivalent to
505      * <pre>&lt;p&gt;Text&lt;/p&gt;</pre>
506      *
507      * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
508      * @since 1.1
509      */
510     protected void setTrimmableWhitespace( boolean trimmable )
511     {
512         this.trimmableWhitespace = trimmable;
513     }
514 
515     /**
516      * <p>getText.</p>
517      *
518      * @param parser A parser, not null.
519      * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
520      * @see XmlPullParser#getText()
521      * @see #isCollapsibleWhitespace()
522      * @see #isTrimmableWhitespace()
523      * @since 1.1
524      */
525     protected String getText( XmlPullParser parser )
526     {
527         String text = parser.getText();
528 
529         if ( isTrimmableWhitespace() )
530         {
531             text = text.trim();
532         }
533 
534         if ( isCollapsibleWhitespace() )
535         {
536             StringBuilder newText = new StringBuilder();
537             String[] elts = StringUtils.split( text, " \r\n" );
538             for ( int i = 0; i < elts.length; i++ )
539             {
540                 newText.append( elts[i] );
541                 if ( ( i + 1 ) < elts.length )
542                 {
543                     newText.append( " " );
544                 }
545             }
546             text = newText.toString();
547         }
548 
549         return text;
550     }
551 
552     /**
553      * Return the defined entities in a local doctype. I.e.:
554      * <pre>
555      * &lt;!DOCTYPE foo [
556      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
557      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
558      * ]&gt;
559      * </pre>
560      *
561      * @return a map of the defined entities in a local doctype.
562      * @since 1.1
563      */
564     protected Map<String, String> getLocalEntities()
565     {
566         if ( entities == null )
567         {
568             entities = new LinkedHashMap<String, String>();
569         }
570 
571         return entities;
572     }
573 
574     /**
575      * <p>isValidate.</p>
576      *
577      * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
578      * @since 1.1
579      */
580     public boolean isValidate()
581     {
582         return validate;
583     }
584 
585     /**
586      * Specify a flag to validate or not the XML content.
587      *
588      * @param validate the validate to set
589      * @see #parse(Reader, Sink)
590      * @since 1.1
591      */
592     public void setValidate( boolean validate )
593     {
594         this.validate = validate;
595     }
596 
597     // ----------------------------------------------------------------------
598     // Private methods
599     // ----------------------------------------------------------------------
600 
601     /**
602      * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
603      * <br/>
604      * By default, we exclude the default XML entities: &#38;amp;, &#38;lt;, &#38;gt;, &#38;quot; and &#38;apos;.
605      *
606      * @param parser not null
607      * @param entityName not null
608      * @param entityValue not null
609      * @throws XmlPullParserException if any
610      * @see {@link XmlPullParser#defineEntityReplacementText(String, String)}
611      */
612     private void addEntity( XmlPullParser parser, String entityName, String entityValue )
613         throws XmlPullParserException
614     {
615         if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
616             || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
617         {
618             return;
619         }
620 
621         parser.defineEntityReplacementText( entityName, entityValue );
622         getLocalEntities().put( entityName, entityValue );
623     }
624 
625     /**
626      * Handle entities defined in a local doctype as the following:
627      * <pre>
628      * &lt;!DOCTYPE foo [
629      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
630      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
631      * ]&gt;
632      * </pre>
633      *
634      * @param parser not null
635      * @param text not null
636      * @throws XmlPullParserException if any
637      */
638     private void addLocalEntities( XmlPullParser parser, String text )
639         throws XmlPullParserException
640     {
641         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
642         if ( entitiesCount > 0 )
643         {
644             // text should be foo [...]
645             int start = text.indexOf( '[' );
646             int end = text.lastIndexOf( ']' );
647             if ( start != -1 && end != -1 )
648             {
649                 addDTDEntities( parser, text.substring( start + 1, end ) );
650             }
651         }
652     }
653 
654     /**
655      * Handle entities defined in external doctypes as the following:
656      * <pre>
657      * &lt;!DOCTYPE foo [
658      *   &lt;!-- These are the entity sets for ISO Latin 1 characters for the XHTML --&gt;
659      *   &lt;!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
660      *          "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"&gt;
661      *   %HTMLlat1;
662      * ]&gt;
663      * </pre>
664      *
665      * @param parser not null
666      * @param text not null
667      * @throws XmlPullParserException if any
668      */
669     private void addDTDEntities( XmlPullParser parser, String text )
670         throws XmlPullParserException
671     {
672         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
673         if ( entitiesCount > 0 )
674         {
675             final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
676             BufferedReader reader = new BufferedReader( new StringReader( txt ) );
677             String line;
678             String tmpLine = "";
679             try
680             {
681                 Matcher matcher;
682                 while ( ( line = reader.readLine() ) != null )
683                 {
684                     tmpLine += "\n" + line;
685                     matcher = PATTERN_ENTITY_1.matcher( tmpLine );
686                     if ( matcher.find() && matcher.groupCount() == 7 )
687                     {
688                         String entityName = matcher.group( 2 );
689                         String entityValue = matcher.group( 5 );
690 
691                         addEntity( parser, entityName, entityValue );
692                         tmpLine = "";
693                     }
694                     else
695                     {
696                         matcher = PATTERN_ENTITY_2.matcher( tmpLine );
697                         if ( matcher.find() && matcher.groupCount() == 8 )
698                         {
699                             String entityName = matcher.group( 2 );
700                             String entityValue = matcher.group( 5 );
701 
702                             addEntity( parser, entityName, entityValue );
703                             tmpLine = "";
704                         }
705                     }
706                 }
707             }
708             catch ( IOException e )
709             {
710                 // nop
711             }
712             finally
713             {
714                 IOUtil.close( reader );
715             }
716         }
717     }
718 
719     /**
720      * Implementation of the callback mechanism <code>EntityResolver</code>.
721      * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
722      */
723     public static class CachedFileEntityResolver
724         implements EntityResolver
725     {
726         /** Map with systemId as key and the content of systemId as byte[]. */
727         protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<String, byte[]>();
728 
729         /** {@inheritDoc} */
730         public InputSource resolveEntity( String publicId, String systemId )
731             throws SAXException, IOException
732         {
733             byte[] res = ENTITY_CACHE.get( systemId );
734             // already cached?
735             if ( res == null )
736             {
737                 String systemName = FileUtils.getFile( systemId ).getName();
738                 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
739                 // maybe already as a temp file?
740                 if ( !temp.exists() )
741                 {
742                     // is systemId a file or an url?
743                     if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
744                     {
745                         // Doxia XSDs are included in the jars, so try to find the resource systemName from
746                         // the classpath...
747                         String resource = "/" + systemName;
748                         URL url = getClass().getResource( resource );
749                         if ( url != null )
750                         {
751                             res = toByteArray( url );
752                         }
753                         else
754                         {
755                             throw new SAXException( "Could not find the SYSTEM entity: " + systemId
756                             + " because '" + resource + "' is not available of the classpath." );
757                         }
758                     }
759                     else
760                     {
761                         res = toByteArray( new URL( systemId ) );
762                     }
763 
764                     // write systemId as temp file
765                     copy( res, temp );
766                 }
767                 else
768                 {
769                     // TODO How to refresh Doxia XSDs from temp dir?
770                     res = toByteArray( temp.toURI().toURL() );
771                 }
772 
773                 ENTITY_CACHE.put( systemId, res );
774             }
775 
776             InputSource is = new InputSource( new ByteArrayInputStream( res ) );
777             is.setPublicId( publicId );
778             is.setSystemId( systemId );
779 
780             return is;
781         }
782 
783         /**
784          * If url is not an http/https urls, call {@link IOUtil#toByteArray(java.io.InputStream)} to get the url
785          * content.
786          * Otherwise, use HttpClient to get the http content.
787          * Wrap all internal exceptions to throw SAXException.
788          *
789          * @param url not null
790          * @return return an array of byte
791          * @throws SAXException if any
792          */
793         private static byte[] toByteArray( URL url )
794             throws SAXException
795         {
796             if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
797             {
798                 InputStream is = null;
799                 try
800                 {
801                     is = url.openStream();
802                     if ( is == null )
803                     {
804                         throw new SAXException( "Cannot open stream from the url: " + url.toString() );
805                     }
806                     return IOUtil.toByteArray( is );
807                 }
808                 catch ( IOException e )
809                 {
810                     throw new SAXException( "IOException: " + e.getMessage(), e );
811                 }
812                 finally
813                 {
814                     IOUtil.close( is );
815                 }
816             }
817 
818             // it is an HTTP url, using HttpClient...
819             DefaultHttpClient client = new DefaultHttpClient();
820             HttpGet method = new HttpGet( url.toString() );
821             // Set a user-agent that doesn't contain the word "java", otherwise it will be blocked by the W3C
822             // The default user-agent is "Apache-HttpClient/4.0.2 (java 1.5)"
823             method.setHeader( "user-agent", "Apache-Doxia/" + doxiaVersion() );
824 
825             HttpRequestRetryHandler retryHandler = new DefaultHttpRequestRetryHandler( 3, false );
826             client.setHttpRequestRetryHandler( retryHandler );
827 
828             HttpEntity entity = null;
829             try
830             {
831                 HttpResponse response = client.execute( method );
832                 int statusCode = response.getStatusLine().getStatusCode();
833                 if ( statusCode != HttpStatus.SC_OK )
834                 {
835                     throw new IOException( "The status code when accessing the URL '" + url.toString() + "' was "
836                         + statusCode + ", which is not allowed. The server gave this reason for the failure '"
837                         + response.getStatusLine().getReasonPhrase() + "'." );
838                 }
839 
840                 entity = response.getEntity();
841                 return EntityUtils.toByteArray( entity );
842             }
843             catch ( ClientProtocolException e )
844             {
845                 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
846             }
847             catch ( IOException e )
848             {
849                 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
850             }
851             finally
852             {
853                 if ( entity != null )
854                 {
855                     try
856                     {
857                         entity.consumeContent();
858                     }
859                     catch ( IOException e )
860                     {
861                         // Ignore
862                     }
863                 }
864             }
865         }
866 
867         /**
868          * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException.
869          *
870          * @param res not null array of byte
871          * @param f the file where to write the bytes
872          * @throws SAXException if any
873          * @see {@link IOUtil#copy(byte[], OutputStream)}
874          */
875         private void copy( byte[] res, File f )
876             throws SAXException
877         {
878             if ( f.isDirectory() )
879             {
880                 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
881             }
882 
883             OutputStream os = null;
884             try
885             {
886                 os = new FileOutputStream( f );
887                 IOUtil.copy( res, os );
888             }
889             catch ( IOException e )
890             {
891                 throw new SAXException( "IOException: " + e.getMessage(), e );
892             }
893             finally
894             {
895                 IOUtil.close( os );
896             }
897         }
898     }
899 }