001package org.apache.maven.doxia.parser;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *   http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import java.io.BufferedReader;
023import java.io.ByteArrayInputStream;
024import java.io.File;
025import java.io.FileOutputStream;
026import java.io.IOException;
027import java.io.InputStream;
028import java.io.OutputStream;
029import java.io.Reader;
030import java.io.StringReader;
031import java.net.URL;
032import java.util.Hashtable;
033import java.util.LinkedHashMap;
034import java.util.Locale;
035import java.util.Map;
036import java.util.regex.Matcher;
037import java.util.regex.Pattern;
038
039import org.apache.http.HttpEntity;
040import org.apache.http.HttpResponse;
041import org.apache.http.HttpStatus;
042import org.apache.http.client.ClientProtocolException;
043import org.apache.http.client.HttpRequestRetryHandler;
044import org.apache.http.client.methods.HttpGet;
045import org.apache.http.impl.client.DefaultHttpClient;
046import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
047import org.apache.http.util.EntityUtils;
048
049import org.apache.maven.doxia.macro.MacroExecutionException;
050import org.apache.maven.doxia.markup.XmlMarkup;
051import org.apache.maven.doxia.sink.Sink;
052import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
053import org.apache.maven.doxia.util.HtmlTools;
054import org.apache.maven.doxia.util.XmlValidator;
055
056import org.codehaus.plexus.util.FileUtils;
057import org.codehaus.plexus.util.IOUtil;
058import org.codehaus.plexus.util.StringUtils;
059import org.codehaus.plexus.util.xml.pull.MXParser;
060import org.codehaus.plexus.util.xml.pull.XmlPullParser;
061import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
062
063import org.xml.sax.EntityResolver;
064import org.xml.sax.InputSource;
065import org.xml.sax.SAXException;
066
067/**
068 * An abstract class that defines some convenience methods for <code>XML</code> parsers.
069 *
070 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
071 * @version $Id: AbstractXmlParser.html 979316 2016-02-02 21:51:43Z hboutemy $
072 * @since 1.0
073 */
074public abstract class AbstractXmlParser
075    extends AbstractParser
076    implements XmlMarkup
077{
078    /**
079     * Entity pattern for HTML entity, i.e. &#38;nbsp;
080     * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
081     * <br/>
082     * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
083     */
084    private static final Pattern PATTERN_ENTITY_1 =
085        Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
086
087    /**
088     * Entity pattern for Unicode entity, i.e. &#38;#38;
089     * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
090     * <br/>
091     * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
092     */
093    private static final Pattern PATTERN_ENTITY_2 =
094        Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
095
096    private boolean ignorableWhitespace;
097
098    private boolean collapsibleWhitespace;
099
100    private boolean trimmableWhitespace;
101
102    private Map<String, String> entities;
103
104    private boolean validate = false;
105
106    /** {@inheritDoc} */
107    public void parse( Reader source, Sink sink )
108        throws ParseException
109    {
110        init();
111
112        Reader src = source;
113
114        // 1 first parsing if validation is required
115        if ( isValidate() )
116        {
117            String content;
118            try
119            {
120                content = IOUtil.toString( new BufferedReader( src ) );
121            }
122            catch ( IOException e )
123            {
124                throw new ParseException( "Error reading the model: " + e.getMessage(), e );
125            }
126
127            new XmlValidator( getLog() ).validate( content );
128
129            src = new StringReader( content );
130        }
131
132        // 2 second parsing to process
133        try
134        {
135            XmlPullParser parser = new MXParser();
136
137            parser.setInput( src );
138            
139            // allow parser initialization, e.g. for additional entities in XHTML
140            // Note: do it after input is set, otherwise values are reset
141            initXmlParser( parser );
142
143            sink.enableLogging( getLog() );
144
145            parseXml( parser, sink );
146        }
147        catch ( XmlPullParserException ex )
148        {
149            throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
150                                      ex.getColumnNumber() );
151        }
152        catch ( MacroExecutionException ex )
153        {
154            throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
155        }
156
157        setSecondParsing( false );
158        init();
159    }
160    
161    /**
162     * Initializes the parser with custom entities or other options.
163     *
164     * @param parser A parser, not null.
165     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser
166     */
167    protected void initXmlParser( XmlPullParser parser )
168        throws XmlPullParserException
169    {
170        // nop
171    }
172
173    /**
174     * {@inheritDoc}
175     *
176     * Convenience method to parse an arbitrary string and emit any xml events into the given sink.
177     */
178    @Override
179    public void parse( String string, Sink sink )
180        throws ParseException
181    {
182        super.parse( string, sink );
183    }
184
185    /** {@inheritDoc} */
186    @Override
187    public final int getType()
188    {
189        return XML_TYPE;
190    }
191
192    /**
193     * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
194     *
195     * @param parser A parser, not null.
196     * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
197     * @since 1.1
198     */
199    protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
200    {
201        int count = parser.getAttributeCount();
202
203        if ( count < 0 )
204        {
205            return null;
206        }
207
208        SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
209
210        for ( int i = 0; i < count; i++ )
211        {
212            atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
213        }
214
215        return atts;
216    }
217
218    /**
219     * Parse the model from the XmlPullParser into the given sink.
220     *
221     * @param parser A parser, not null.
222     * @param sink the sink to receive the events.
223     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
224     * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
225     */
226    private void parseXml( XmlPullParser parser, Sink sink )
227        throws XmlPullParserException, MacroExecutionException
228    {
229        int eventType = parser.getEventType();
230
231        while ( eventType != XmlPullParser.END_DOCUMENT )
232        {
233            if ( eventType == XmlPullParser.START_TAG )
234            {
235                handleStartTag( parser, sink );
236            }
237            else if ( eventType == XmlPullParser.END_TAG )
238            {
239                handleEndTag( parser, sink );
240            }
241            else if ( eventType == XmlPullParser.TEXT )
242            {
243                String text = getText( parser );
244
245                if ( isIgnorableWhitespace() )
246                {
247                    if ( text.trim().length() != 0 )
248                    {
249                        handleText( parser, sink );
250                    }
251                }
252                else
253                {
254                    handleText( parser, sink );
255                }
256            }
257            else if ( eventType == XmlPullParser.CDSECT )
258            {
259                handleCdsect( parser, sink );
260            }
261            else if ( eventType == XmlPullParser.COMMENT )
262            {
263                handleComment( parser, sink );
264            }
265            else if ( eventType == XmlPullParser.ENTITY_REF )
266            {
267                handleEntity( parser, sink );
268            }
269            else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
270            {
271                // nop
272            }
273            else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
274            {
275                // nop
276            }
277            else if ( eventType == XmlPullParser.DOCDECL )
278            {
279                addLocalEntities( parser, parser.getText() );
280
281                for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
282                {
283                    addDTDEntities( parser, new String( res ) );
284                }
285            }
286
287            try
288            {
289                eventType = parser.nextToken();
290            }
291            catch ( IOException io )
292            {
293                throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
294            }
295        }
296    }
297
298    /**
299     * Goes through the possible start tags.
300     *
301     * @param parser A parser, not null.
302     * @param sink the sink to receive the events.
303     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
304     * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
305     */
306    protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
307        throws XmlPullParserException, MacroExecutionException;
308
309    /**
310     * Goes through the possible end tags.
311     *
312     * @param parser A parser, not null.
313     * @param sink the sink to receive the events.
314     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
315     * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
316     */
317    protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
318        throws XmlPullParserException, MacroExecutionException;
319
320    /**
321     * Handles text events.
322     *
323     * <p>This is a default implementation, if the parser points to a non-empty text element,
324     * it is emitted as a text event into the specified sink.</p>
325     *
326     * @param parser A parser, not null.
327     * @param sink the sink to receive the events. Not null.
328     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
329     */
330    protected void handleText( XmlPullParser parser, Sink sink )
331        throws XmlPullParserException
332    {
333        String text = getText( parser );
334
335        /*
336         * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
337         * parser so any whitespace that makes it here is significant.
338         */
339        if ( StringUtils.isNotEmpty( text ) )
340        {
341            sink.text( text );
342        }
343    }
344
345    /**
346     * Handles CDATA sections.
347     *
348     * <p>This is a default implementation, all data are emitted as text
349     * events into the specified sink.</p>
350     *
351     * @param parser A parser, not null.
352     * @param sink the sink to receive the events. Not null.
353     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
354     */
355    protected void handleCdsect( XmlPullParser parser, Sink sink )
356        throws XmlPullParserException
357    {
358        sink.text( getText( parser ) );
359    }
360
361    /**
362     * Handles comments.
363     *
364     * <p>This is a default implementation, all data are emitted as comment
365     * events into the specified sink.</p>
366     *
367     * @param parser A parser, not null.
368     * @param sink the sink to receive the events. Not null.
369     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
370     */
371    protected void handleComment( XmlPullParser parser, Sink sink )
372        throws XmlPullParserException
373    {
374        if ( isEmitComments() )
375        {
376            sink.comment( getText( parser ) );
377        }
378    }
379
380    /**
381     * Handles entities.
382     *
383     * <p>This is a default implementation, all entities are resolved and emitted as text
384     * events into the specified sink, except:</p>
385     * <ul>
386     * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
387     * are emitted as <code>nonBreakingSpace()</code> events.</li>
388     * </ul>
389     *
390     * @param parser A parser, not null.
391     * @param sink the sink to receive the events. Not null.
392     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
393     */
394    protected void handleEntity( XmlPullParser parser, Sink sink )
395        throws XmlPullParserException
396    {
397        String text = getText( parser );
398
399        String name = parser.getName();
400
401        if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
402        {
403            sink.nonBreakingSpace();
404        }
405        else
406        {
407            String unescaped = HtmlTools.unescapeHTML( text );
408
409            sink.text( unescaped );
410        }
411    }
412
413    /**
414     * Handles an unknown event.
415     *
416     * <p>This is a default implementation, all events are emitted as unknown
417     * events into the specified sink.</p>
418     *
419     * @param parser the parser to get the event from.
420     * @param sink the sink to receive the event.
421     * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
422     * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
423     * It will be passed as the first argument of the required parameters to the Sink
424     * {@link
425     * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
426     * method.
427     */
428    protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
429    {
430        Object[] required = new Object[] { Integer.valueOf( type ) };
431
432        SinkEventAttributeSet attribs = getAttributesFromParser( parser );
433
434        sink.unknown( parser.getName(), required, attribs );
435    }
436
437    /**
438     * <p>isIgnorableWhitespace.</p>
439     *
440     * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
441     * @see #setIgnorableWhitespace(boolean)
442     * @since 1.1
443     */
444    protected boolean isIgnorableWhitespace()
445    {
446        return ignorableWhitespace;
447    }
448
449    /**
450     * Specify that whitespace will be ignored. I.e.:
451     * <pre>&lt;tr&gt; &lt;td/&gt; &lt;/tr&gt;</pre>
452     * is equivalent to
453     * <pre>&lt;tr&gt;&lt;td/&gt;&lt;/tr&gt;</pre>
454     *
455     * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
456     * @since 1.1
457     */
458    protected void setIgnorableWhitespace( boolean ignorable )
459    {
460        this.ignorableWhitespace = ignorable;
461    }
462
463    /**
464     * <p>isCollapsibleWhitespace.</p>
465     *
466     * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
467     * @see #setCollapsibleWhitespace(boolean)
468     * @since 1.1
469     */
470    protected boolean isCollapsibleWhitespace()
471    {
472        return collapsibleWhitespace;
473    }
474
475    /**
476     * Specify that text will be collapsed. I.e.:
477     * <pre>Text   Text</pre>
478     * is equivalent to
479     * <pre>Text Text</pre>
480     *
481     * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
482     * @since 1.1
483     */
484    protected void setCollapsibleWhitespace( boolean collapsible )
485    {
486        this.collapsibleWhitespace = collapsible;
487    }
488
489    /**
490     * <p>isTrimmableWhitespace.</p>
491     *
492     * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
493     * @see #setTrimmableWhitespace(boolean)
494     * @since 1.1
495     */
496    protected boolean isTrimmableWhitespace()
497    {
498        return trimmableWhitespace;
499    }
500
501    /**
502     * Specify that text will be collapsed. I.e.:
503     * <pre>&lt;p&gt; Text &lt;/p&gt;</pre>
504     * is equivalent to
505     * <pre>&lt;p&gt;Text&lt;/p&gt;</pre>
506     *
507     * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
508     * @since 1.1
509     */
510    protected void setTrimmableWhitespace( boolean trimmable )
511    {
512        this.trimmableWhitespace = trimmable;
513    }
514
515    /**
516     * <p>getText.</p>
517     *
518     * @param parser A parser, not null.
519     * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
520     * @see XmlPullParser#getText()
521     * @see #isCollapsibleWhitespace()
522     * @see #isTrimmableWhitespace()
523     * @since 1.1
524     */
525    protected String getText( XmlPullParser parser )
526    {
527        String text = parser.getText();
528
529        if ( isTrimmableWhitespace() )
530        {
531            text = text.trim();
532        }
533
534        if ( isCollapsibleWhitespace() )
535        {
536            StringBuilder newText = new StringBuilder();
537            String[] elts = StringUtils.split( text, " \r\n" );
538            for ( int i = 0; i < elts.length; i++ )
539            {
540                newText.append( elts[i] );
541                if ( ( i + 1 ) < elts.length )
542                {
543                    newText.append( " " );
544                }
545            }
546            text = newText.toString();
547        }
548
549        return text;
550    }
551
552    /**
553     * Return the defined entities in a local doctype. I.e.:
554     * <pre>
555     * &lt;!DOCTYPE foo [
556     *   &lt;!ENTITY bar "&#38;#x160;"&gt;
557     *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
558     * ]&gt;
559     * </pre>
560     *
561     * @return a map of the defined entities in a local doctype.
562     * @since 1.1
563     */
564    protected Map<String, String> getLocalEntities()
565    {
566        if ( entities == null )
567        {
568            entities = new LinkedHashMap<String, String>();
569        }
570
571        return entities;
572    }
573
574    /**
575     * <p>isValidate.</p>
576     *
577     * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
578     * @since 1.1
579     */
580    public boolean isValidate()
581    {
582        return validate;
583    }
584
585    /**
586     * Specify a flag to validate or not the XML content.
587     *
588     * @param validate the validate to set
589     * @see #parse(Reader, Sink)
590     * @since 1.1
591     */
592    public void setValidate( boolean validate )
593    {
594        this.validate = validate;
595    }
596
597    // ----------------------------------------------------------------------
598    // Private methods
599    // ----------------------------------------------------------------------
600
601    /**
602     * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
603     * <br/>
604     * By default, we exclude the default XML entities: &#38;amp;, &#38;lt;, &#38;gt;, &#38;quot; and &#38;apos;.
605     *
606     * @param parser not null
607     * @param entityName not null
608     * @param entityValue not null
609     * @throws XmlPullParserException if any
610     * @see {@link XmlPullParser#defineEntityReplacementText(String, String)}
611     */
612    private void addEntity( XmlPullParser parser, String entityName, String entityValue )
613        throws XmlPullParserException
614    {
615        if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
616            || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
617        {
618            return;
619        }
620
621        parser.defineEntityReplacementText( entityName, entityValue );
622        getLocalEntities().put( entityName, entityValue );
623    }
624
625    /**
626     * Handle entities defined in a local doctype as the following:
627     * <pre>
628     * &lt;!DOCTYPE foo [
629     *   &lt;!ENTITY bar "&#38;#x160;"&gt;
630     *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
631     * ]&gt;
632     * </pre>
633     *
634     * @param parser not null
635     * @param text not null
636     * @throws XmlPullParserException if any
637     */
638    private void addLocalEntities( XmlPullParser parser, String text )
639        throws XmlPullParserException
640    {
641        int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
642        if ( entitiesCount > 0 )
643        {
644            // text should be foo [...]
645            int start = text.indexOf( '[' );
646            int end = text.lastIndexOf( ']' );
647            if ( start != -1 && end != -1 )
648            {
649                addDTDEntities( parser, text.substring( start + 1, end ) );
650            }
651        }
652    }
653
654    /**
655     * Handle entities defined in external doctypes as the following:
656     * <pre>
657     * &lt;!DOCTYPE foo [
658     *   &lt;!-- These are the entity sets for ISO Latin 1 characters for the XHTML --&gt;
659     *   &lt;!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
660     *          "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"&gt;
661     *   %HTMLlat1;
662     * ]&gt;
663     * </pre>
664     *
665     * @param parser not null
666     * @param text not null
667     * @throws XmlPullParserException if any
668     */
669    private void addDTDEntities( XmlPullParser parser, String text )
670        throws XmlPullParserException
671    {
672        int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
673        if ( entitiesCount > 0 )
674        {
675            final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
676            BufferedReader reader = new BufferedReader( new StringReader( txt ) );
677            String line;
678            String tmpLine = "";
679            try
680            {
681                Matcher matcher;
682                while ( ( line = reader.readLine() ) != null )
683                {
684                    tmpLine += "\n" + line;
685                    matcher = PATTERN_ENTITY_1.matcher( tmpLine );
686                    if ( matcher.find() && matcher.groupCount() == 7 )
687                    {
688                        String entityName = matcher.group( 2 );
689                        String entityValue = matcher.group( 5 );
690
691                        addEntity( parser, entityName, entityValue );
692                        tmpLine = "";
693                    }
694                    else
695                    {
696                        matcher = PATTERN_ENTITY_2.matcher( tmpLine );
697                        if ( matcher.find() && matcher.groupCount() == 8 )
698                        {
699                            String entityName = matcher.group( 2 );
700                            String entityValue = matcher.group( 5 );
701
702                            addEntity( parser, entityName, entityValue );
703                            tmpLine = "";
704                        }
705                    }
706                }
707            }
708            catch ( IOException e )
709            {
710                // nop
711            }
712            finally
713            {
714                IOUtil.close( reader );
715            }
716        }
717    }
718
719    /**
720     * Implementation of the callback mechanism <code>EntityResolver</code>.
721     * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
722     */
723    public static class CachedFileEntityResolver
724        implements EntityResolver
725    {
726        /** Map with systemId as key and the content of systemId as byte[]. */
727        protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<String, byte[]>();
728
729        /** {@inheritDoc} */
730        public InputSource resolveEntity( String publicId, String systemId )
731            throws SAXException, IOException
732        {
733            byte[] res = ENTITY_CACHE.get( systemId );
734            // already cached?
735            if ( res == null )
736            {
737                String systemName = FileUtils.getFile( systemId ).getName();
738                File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
739                // maybe already as a temp file?
740                if ( !temp.exists() )
741                {
742                    // is systemId a file or an url?
743                    if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
744                    {
745                        // Doxia XSDs are included in the jars, so try to find the resource systemName from
746                        // the classpath...
747                        String resource = "/" + systemName;
748                        URL url = getClass().getResource( resource );
749                        if ( url != null )
750                        {
751                            res = toByteArray( url );
752                        }
753                        else
754                        {
755                            throw new SAXException( "Could not find the SYSTEM entity: " + systemId
756                            + " because '" + resource + "' is not available of the classpath." );
757                        }
758                    }
759                    else
760                    {
761                        res = toByteArray( new URL( systemId ) );
762                    }
763
764                    // write systemId as temp file
765                    copy( res, temp );
766                }
767                else
768                {
769                    // TODO How to refresh Doxia XSDs from temp dir?
770                    res = toByteArray( temp.toURI().toURL() );
771                }
772
773                ENTITY_CACHE.put( systemId, res );
774            }
775
776            InputSource is = new InputSource( new ByteArrayInputStream( res ) );
777            is.setPublicId( publicId );
778            is.setSystemId( systemId );
779
780            return is;
781        }
782
783        /**
784         * If url is not an http/https urls, call {@link IOUtil#toByteArray(java.io.InputStream)} to get the url
785         * content.
786         * Otherwise, use HttpClient to get the http content.
787         * Wrap all internal exceptions to throw SAXException.
788         *
789         * @param url not null
790         * @return return an array of byte
791         * @throws SAXException if any
792         */
793        private static byte[] toByteArray( URL url )
794            throws SAXException
795        {
796            if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
797            {
798                InputStream is = null;
799                try
800                {
801                    is = url.openStream();
802                    if ( is == null )
803                    {
804                        throw new SAXException( "Cannot open stream from the url: " + url.toString() );
805                    }
806                    return IOUtil.toByteArray( is );
807                }
808                catch ( IOException e )
809                {
810                    throw new SAXException( "IOException: " + e.getMessage(), e );
811                }
812                finally
813                {
814                    IOUtil.close( is );
815                }
816            }
817
818            // it is an HTTP url, using HttpClient...
819            DefaultHttpClient client = new DefaultHttpClient();
820            HttpGet method = new HttpGet( url.toString() );
821            // Set a user-agent that doesn't contain the word "java", otherwise it will be blocked by the W3C
822            // The default user-agent is "Apache-HttpClient/4.0.2 (java 1.5)"
823            method.setHeader( "user-agent", "Apache-Doxia/" + doxiaVersion() );
824
825            HttpRequestRetryHandler retryHandler = new DefaultHttpRequestRetryHandler( 3, false );
826            client.setHttpRequestRetryHandler( retryHandler );
827
828            HttpEntity entity = null;
829            try
830            {
831                HttpResponse response = client.execute( method );
832                int statusCode = response.getStatusLine().getStatusCode();
833                if ( statusCode != HttpStatus.SC_OK )
834                {
835                    throw new IOException( "The status code when accessing the URL '" + url.toString() + "' was "
836                        + statusCode + ", which is not allowed. The server gave this reason for the failure '"
837                        + response.getStatusLine().getReasonPhrase() + "'." );
838                }
839
840                entity = response.getEntity();
841                return EntityUtils.toByteArray( entity );
842            }
843            catch ( ClientProtocolException e )
844            {
845                throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
846            }
847            catch ( IOException e )
848            {
849                throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
850            }
851            finally
852            {
853                if ( entity != null )
854                {
855                    try
856                    {
857                        entity.consumeContent();
858                    }
859                    catch ( IOException e )
860                    {
861                        // Ignore
862                    }
863                }
864            }
865        }
866
867        /**
868         * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException.
869         *
870         * @param res not null array of byte
871         * @param f the file where to write the bytes
872         * @throws SAXException if any
873         * @see {@link IOUtil#copy(byte[], OutputStream)}
874         */
875        private void copy( byte[] res, File f )
876            throws SAXException
877        {
878            if ( f.isDirectory() )
879            {
880                throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
881            }
882
883            OutputStream os = null;
884            try
885            {
886                os = new FileOutputStream( f );
887                IOUtil.copy( res, os );
888            }
889            catch ( IOException e )
890            {
891                throw new SAXException( "IOException: " + e.getMessage(), e );
892            }
893            finally
894            {
895                IOUtil.close( os );
896            }
897        }
898    }
899}