View Javadoc
1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.BufferedReader;
23  import java.io.ByteArrayInputStream;
24  import java.io.File;
25  import java.io.FileOutputStream;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.io.OutputStream;
29  import java.io.Reader;
30  import java.io.StringReader;
31  import java.net.URL;
32  import java.util.Hashtable;
33  import java.util.LinkedHashMap;
34  import java.util.Locale;
35  import java.util.Map;
36  import java.util.regex.Matcher;
37  import java.util.regex.Pattern;
38  
39  import org.apache.maven.doxia.macro.MacroExecutionException;
40  import org.apache.maven.doxia.markup.XmlMarkup;
41  import org.apache.maven.doxia.sink.Sink;
42  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
43  import org.apache.maven.doxia.util.HtmlTools;
44  import org.apache.maven.doxia.util.XmlValidator;
45  
46  import org.codehaus.plexus.util.FileUtils;
47  import org.codehaus.plexus.util.IOUtil;
48  import org.codehaus.plexus.util.StringUtils;
49  import org.codehaus.plexus.util.xml.pull.MXParser;
50  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
51  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
52  
53  import org.xml.sax.EntityResolver;
54  import org.xml.sax.InputSource;
55  import org.xml.sax.SAXException;
56  
57  /**
58   * An abstract class that defines some convenience methods for <code>XML</code> parsers.
59   *
60   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
61   * @since 1.0
62   */
63  public abstract class AbstractXmlParser
64      extends AbstractParser
65      implements XmlMarkup
66  {
67      /**
68       * Entity pattern for HTML entity, i.e. &#38;nbsp;
69       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
70       * <br>
71       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
72       */
73      private static final Pattern PATTERN_ENTITY_1 =
74          Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
75  
76      /**
77       * Entity pattern for Unicode entity, i.e. &#38;#38;
78       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
79       * <br>
80       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
81       */
82      private static final Pattern PATTERN_ENTITY_2 =
83          Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
84  
85      private boolean ignorableWhitespace;
86  
87      private boolean collapsibleWhitespace;
88  
89      private boolean trimmableWhitespace;
90  
91      private Map<String, String> entities;
92  
93      private boolean validate = false;
94  
95      /** {@inheritDoc} */
96      public void parse( Reader source, Sink sink, String reference )
97          throws ParseException
98      {
99          init();
100 
101         Reader src = source;
102 
103         // 1 first parsing if validation is required
104         if ( isValidate() )
105         {
106             String content;
107             try
108             {
109                 content = IOUtil.toString( new BufferedReader( src ) );
110             }
111             catch ( IOException e )
112             {
113                 throw new ParseException( "Error reading the model", e );
114             }
115 
116             new XmlValidator( ).validate( content );
117 
118             src = new StringReader( content );
119         }
120 
121         // 2 second parsing to process
122         try
123         {
124             XmlPullParser parser = new MXParser();
125 
126             parser.setInput( src );
127 
128             // allow parser initialization, e.g. for additional entities in XHTML
129             // Note: do it after input is set, otherwise values are reset
130             initXmlParser( parser );
131 
132             parseXml( parser, sink );
133         }
134         catch ( XmlPullParserException ex )
135         {
136             throw new ParseException( "Error parsing the model", ex, ex.getLineNumber(),
137                                       ex.getColumnNumber() );
138         }
139         catch ( MacroExecutionException ex )
140         {
141             throw new ParseException( "Macro execution failed", ex );
142         }
143 
144         setSecondParsing( false );
145         init();
146     }
147 
148     /**
149      * Initializes the parser with custom entities or other options.
150      *
151      * @param parser A parser, not null.
152      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser
153      */
154     protected void initXmlParser( XmlPullParser parser )
155         throws XmlPullParserException
156     {
157         // nop
158     }
159 
160     /** {@inheritDoc} */
161     @Override
162     public final int getType()
163     {
164         return XML_TYPE;
165     }
166 
167     /**
168      * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
169      *
170      * @param parser A parser, not null.
171      * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
172      * @since 1.1
173      */
174     protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
175     {
176         int count = parser.getAttributeCount();
177 
178         if ( count < 0 )
179         {
180             return null;
181         }
182 
183         SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
184 
185         for ( int i = 0; i < count; i++ )
186         {
187             atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
188         }
189 
190         return atts;
191     }
192 
193     /**
194      * Parse the model from the XmlPullParser into the given sink.
195      *
196      * @param parser A parser, not null.
197      * @param sink the sink to receive the events.
198      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
199      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
200      */
201     private void parseXml( XmlPullParser parser, Sink sink )
202         throws XmlPullParserException, MacroExecutionException
203     {
204         int eventType = parser.getEventType();
205 
206         while ( eventType != XmlPullParser.END_DOCUMENT )
207         {
208             if ( eventType == XmlPullParser.START_TAG )
209             {
210                 handleStartTag( parser, sink );
211             }
212             else if ( eventType == XmlPullParser.END_TAG )
213             {
214                 handleEndTag( parser, sink );
215             }
216             else if ( eventType == XmlPullParser.TEXT )
217             {
218                 String text = getText( parser );
219 
220                 if ( isIgnorableWhitespace() )
221                 {
222                     if ( text.trim().length() != 0 )
223                     {
224                         handleText( parser, sink );
225                     }
226                 }
227                 else
228                 {
229                     handleText( parser, sink );
230                 }
231             }
232             else if ( eventType == XmlPullParser.CDSECT )
233             {
234                 handleCdsect( parser, sink );
235             }
236             else if ( eventType == XmlPullParser.COMMENT )
237             {
238                 handleComment( parser, sink );
239             }
240             else if ( eventType == XmlPullParser.ENTITY_REF )
241             {
242                 handleEntity( parser, sink );
243             }
244             else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
245             {
246                 // nop
247             }
248             else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
249             {
250                 // nop
251             }
252             else if ( eventType == XmlPullParser.DOCDECL )
253             {
254                 addLocalEntities( parser, parser.getText() );
255 
256                 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
257                 {
258                     addDTDEntities( parser, new String( res ) );
259                 }
260             }
261 
262             try
263             {
264                 eventType = parser.nextToken();
265             }
266             catch ( IOException io )
267             {
268                 // Does not have a cause arg
269                 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
270             }
271         }
272     }
273 
274     /**
275      * Goes through the possible start tags.
276      *
277      * @param parser A parser, not null.
278      * @param sink the sink to receive the events.
279      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
280      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
281      */
282     protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
283         throws XmlPullParserException, MacroExecutionException;
284 
285     /**
286      * Goes through the possible end tags.
287      *
288      * @param parser A parser, not null.
289      * @param sink the sink to receive the events.
290      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
291      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
292      */
293     protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
294         throws XmlPullParserException, MacroExecutionException;
295 
296     /**
297      * Handles text events.
298      *
299      * <p>This is a default implementation, if the parser points to a non-empty text element,
300      * it is emitted as a text event into the specified sink.</p>
301      *
302      * @param parser A parser, not null.
303      * @param sink the sink to receive the events. Not null.
304      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
305      */
306     protected void handleText( XmlPullParser parser, Sink sink )
307         throws XmlPullParserException
308     {
309         String text = getText( parser );
310 
311         /*
312          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
313          * parser so any whitespace that makes it here is significant.
314          */
315         if ( StringUtils.isNotEmpty( text ) )
316         {
317             sink.text( text );
318         }
319     }
320 
321     /**
322      * Handles CDATA sections.
323      *
324      * <p>This is a default implementation, all data are emitted as text
325      * events into the specified sink.</p>
326      *
327      * @param parser A parser, not null.
328      * @param sink the sink to receive the events. Not null.
329      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
330      */
331     protected void handleCdsect( XmlPullParser parser, Sink sink )
332         throws XmlPullParserException
333     {
334         sink.text( getText( parser ) );
335     }
336 
337     /**
338      * Handles comments.
339      *
340      * <p>This is a default implementation, all data are emitted as comment
341      * events into the specified sink.</p>
342      *
343      * @param parser A parser, not null.
344      * @param sink the sink to receive the events. Not null.
345      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
346      */
347     protected void handleComment( XmlPullParser parser, Sink sink )
348         throws XmlPullParserException
349     {
350         if ( isEmitComments() )
351         {
352             sink.comment( getText( parser ) );
353         }
354     }
355 
356     /**
357      * Handles entities.
358      *
359      * <p>This is a default implementation, all entities are resolved and emitted as text
360      * events into the specified sink, except:</p>
361      * <ul>
362      * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
363      * are emitted as <code>nonBreakingSpace()</code> events.</li>
364      * </ul>
365      *
366      * @param parser A parser, not null.
367      * @param sink the sink to receive the events. Not null.
368      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
369      */
370     protected void handleEntity( XmlPullParser parser, Sink sink )
371         throws XmlPullParserException
372     {
373         String text = getText( parser );
374 
375         String name = parser.getName();
376 
377         if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
378         {
379             sink.nonBreakingSpace();
380         }
381         else
382         {
383             String unescaped = HtmlTools.unescapeHTML( text );
384 
385             sink.text( unescaped );
386         }
387     }
388 
389     /**
390      * Handles an unknown event.
391      *
392      * <p>This is a default implementation, all events are emitted as unknown
393      * events into the specified sink.</p>
394      *
395      * @param parser the parser to get the event from.
396      * @param sink the sink to receive the event.
397      * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
398      * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
399      * It will be passed as the first argument of the required parameters to the Sink
400      * {@link
401      * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
402      * method.
403      */
404     protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
405     {
406         Object[] required = new Object[] { type };
407 
408         SinkEventAttributeSet attribs = getAttributesFromParser( parser );
409 
410         sink.unknown( parser.getName(), required, attribs );
411     }
412 
413     /**
414      * <p>isIgnorableWhitespace.</p>
415      *
416      * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
417      * @see #setIgnorableWhitespace(boolean)
418      * @since 1.1
419      */
420     protected boolean isIgnorableWhitespace()
421     {
422         return ignorableWhitespace;
423     }
424 
425     /**
426      * Specify that whitespace will be ignored. I.e.:
427      * <pre>&lt;tr&gt; &lt;td/&gt; &lt;/tr&gt;</pre>
428      * is equivalent to
429      * <pre>&lt;tr&gt;&lt;td/&gt;&lt;/tr&gt;</pre>
430      *
431      * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
432      * @since 1.1
433      */
434     protected void setIgnorableWhitespace( boolean ignorable )
435     {
436         this.ignorableWhitespace = ignorable;
437     }
438 
439     /**
440      * <p>isCollapsibleWhitespace.</p>
441      *
442      * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
443      * @see #setCollapsibleWhitespace(boolean)
444      * @since 1.1
445      */
446     protected boolean isCollapsibleWhitespace()
447     {
448         return collapsibleWhitespace;
449     }
450 
451     /**
452      * Specify that text will be collapsed. I.e.:
453      * <pre>Text   Text</pre>
454      * is equivalent to
455      * <pre>Text Text</pre>
456      *
457      * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
458      * @since 1.1
459      */
460     protected void setCollapsibleWhitespace( boolean collapsible )
461     {
462         this.collapsibleWhitespace = collapsible;
463     }
464 
465     /**
466      * <p>isTrimmableWhitespace.</p>
467      *
468      * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
469      * @see #setTrimmableWhitespace(boolean)
470      * @since 1.1
471      */
472     protected boolean isTrimmableWhitespace()
473     {
474         return trimmableWhitespace;
475     }
476 
477     /**
478      * Specify that text will be collapsed. I.e.:
479      * <pre>&lt;p&gt; Text &lt;/p&gt;</pre>
480      * is equivalent to
481      * <pre>&lt;p&gt;Text&lt;/p&gt;</pre>
482      *
483      * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
484      * @since 1.1
485      */
486     protected void setTrimmableWhitespace( boolean trimmable )
487     {
488         this.trimmableWhitespace = trimmable;
489     }
490 
491     /**
492      * <p>getText.</p>
493      *
494      * @param parser A parser, not null.
495      * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
496      * @see XmlPullParser#getText()
497      * @see #isCollapsibleWhitespace()
498      * @see #isTrimmableWhitespace()
499      * @since 1.1
500      */
501     protected String getText( XmlPullParser parser )
502     {
503         String text = parser.getText();
504 
505         if ( isTrimmableWhitespace() )
506         {
507             text = text.trim();
508         }
509 
510         if ( isCollapsibleWhitespace() )
511         {
512             StringBuilder newText = new StringBuilder();
513             String[] elts = StringUtils.split( text, " \r\n" );
514             for ( int i = 0; i < elts.length; i++ )
515             {
516                 newText.append( elts[i] );
517                 if ( ( i + 1 ) < elts.length )
518                 {
519                     newText.append( " " );
520                 }
521             }
522             text = newText.toString();
523         }
524 
525         return text;
526     }
527 
528     /**
529      * Return the defined entities in a local doctype. I.e.:
530      * <pre>
531      * &lt;!DOCTYPE foo [
532      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
533      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
534      * ]&gt;
535      * </pre>
536      *
537      * @return a map of the defined entities in a local doctype.
538      * @since 1.1
539      */
540     protected Map<String, String> getLocalEntities()
541     {
542         if ( entities == null )
543         {
544             entities = new LinkedHashMap<>();
545         }
546 
547         return entities;
548     }
549 
550     /**
551      * <p>isValidate.</p>
552      *
553      * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
554      * @since 1.1
555      */
556     public boolean isValidate()
557     {
558         return validate;
559     }
560 
561     /**
562      * Specify a flag to validate or not the XML content.
563      *
564      * @param validate the validate to set
565      * @see #parse(Reader, Sink)
566      * @since 1.1
567      */
568     public void setValidate( boolean validate )
569     {
570         this.validate = validate;
571     }
572 
573     // ----------------------------------------------------------------------
574     // Private methods
575     // ----------------------------------------------------------------------
576 
577     /**
578      * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
579      * <br>
580      * By default, we exclude the default XML entities: &#38;amp;, &#38;lt;, &#38;gt;, &#38;quot; and &#38;apos;.
581      *
582      * @param parser not null
583      * @param entityName not null
584      * @param entityValue not null
585      * @throws XmlPullParserException if any
586      * @see XmlPullParser#defineEntityReplacementText(String, String)
587      */
588     private void addEntity( XmlPullParser parser, String entityName, String entityValue )
589         throws XmlPullParserException
590     {
591         if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
592             || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
593         {
594             return;
595         }
596 
597         parser.defineEntityReplacementText( entityName, entityValue );
598         getLocalEntities().put( entityName, entityValue );
599     }
600 
601     /**
602      * Handle entities defined in a local doctype as the following:
603      * <pre>
604      * &lt;!DOCTYPE foo [
605      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
606      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
607      * ]&gt;
608      * </pre>
609      *
610      * @param parser not null
611      * @param text not null
612      * @throws XmlPullParserException if any
613      */
614     private void addLocalEntities( XmlPullParser parser, String text )
615         throws XmlPullParserException
616     {
617         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
618         if ( entitiesCount > 0 )
619         {
620             // text should be foo [...]
621             int start = text.indexOf( '[' );
622             int end = text.lastIndexOf( ']' );
623             if ( start != -1 && end != -1 )
624             {
625                 addDTDEntities( parser, text.substring( start + 1, end ) );
626             }
627         }
628     }
629 
630     /**
631      * Handle entities defined in external doctypes as the following:
632      * <pre>
633      * &lt;!DOCTYPE foo [
634      *   &lt;!-- These are the entity sets for ISO Latin 1 characters for the XHTML --&gt;
635      *   &lt;!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
636      *          "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"&gt;
637      *   %HTMLlat1;
638      * ]&gt;
639      * </pre>
640      *
641      * @param parser not null
642      * @param text not null
643      * @throws XmlPullParserException if any
644      */
645     private void addDTDEntities( XmlPullParser parser, String text )
646         throws XmlPullParserException
647     {
648         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
649         if ( entitiesCount > 0 )
650         {
651             final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
652             try ( BufferedReader reader = new BufferedReader( new StringReader( txt ) ) )
653             {
654                 String line;
655                 String tmpLine = "";
656                 Matcher matcher;
657                 while ( ( line = reader.readLine() ) != null )
658                 {
659                     tmpLine += "\n" + line;
660                     matcher = PATTERN_ENTITY_1.matcher( tmpLine );
661                     if ( matcher.find() && matcher.groupCount() == 7 )
662                     {
663                         String entityName = matcher.group( 2 );
664                         String entityValue = matcher.group( 5 );
665 
666                         addEntity( parser, entityName, entityValue );
667                         tmpLine = "";
668                     }
669                     else
670                     {
671                         matcher = PATTERN_ENTITY_2.matcher( tmpLine );
672                         if ( matcher.find() && matcher.groupCount() == 8 )
673                         {
674                             String entityName = matcher.group( 2 );
675                             String entityValue = matcher.group( 5 );
676 
677                             addEntity( parser, entityName, entityValue );
678                             tmpLine = "";
679                         }
680                     }
681                 }
682             }
683             catch ( IOException e )
684             {
685                 // nop
686             }
687         }
688     }
689 
690     /**
691      * Implementation of the callback mechanism <code>EntityResolver</code>.
692      * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
693      */
694     public static class CachedFileEntityResolver
695         implements EntityResolver
696     {
697         /** Map with systemId as key and the content of systemId as byte[]. */
698         protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>();
699 
700         /** {@inheritDoc} */
701         public InputSource resolveEntity( String publicId, String systemId )
702             throws SAXException, IOException
703         {
704             byte[] res = ENTITY_CACHE.get( systemId );
705             // already cached?
706             if ( res == null )
707             {
708                 String systemName = FileUtils.getFile( systemId ).getName();
709                 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
710                 // maybe already as a temp file?
711                 if ( !temp.exists() )
712                 {
713                     // is systemId a file or an url?
714                     if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
715                     {
716                         // Doxia XSDs are included in the jars, so try to find the resource systemName from
717                         // the classpath...
718                         String resource = "/" + systemName;
719                         URL url = getClass().getResource( resource );
720                         if ( url != null )
721                         {
722                             res = toByteArray( url );
723                         }
724                         else
725                         {
726                             throw new SAXException( "Could not find the SYSTEM entity: " + systemId
727                             + " because '" + resource + "' is not available of the classpath." );
728                         }
729                     }
730                     else
731                     {
732                         res = toByteArray( new URL( systemId ) );
733                     }
734 
735                     // write systemId as temp file
736                     copy( res, temp );
737                 }
738                 else
739                 {
740                     // TODO How to refresh Doxia XSDs from temp dir?
741                     res = toByteArray( temp.toURI().toURL() );
742                 }
743 
744                 ENTITY_CACHE.put( systemId, res );
745             }
746 
747             InputSource is = new InputSource( new ByteArrayInputStream( res ) );
748             is.setPublicId( publicId );
749             is.setSystemId( systemId );
750 
751             return is;
752         }
753 
754         /**
755          * @param url not null
756          * @return return an array of byte
757          * @throws SAXException if any
758          */
759         private static byte[] toByteArray( URL url )
760             throws SAXException
761         {
762             InputStream is = null;
763             try
764             {
765                 is = url.openStream();
766                 if ( is == null )
767                 {
768                     throw new SAXException( "Cannot open stream from the url: " + url );
769                 }
770                 return IOUtil.toByteArray( is );
771             }
772             catch ( IOException e )
773             {
774                 throw new SAXException( e );
775             }
776             finally
777             {
778                 IOUtil.close( is );
779             }
780         }
781 
782         /**
783          * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException.
784          *
785          * @param res not null array of byte
786          * @param f the file where to write the bytes
787          * @throws SAXException if any
788          * @see IOUtil#copy(byte[], OutputStream)
789          */
790         private void copy( byte[] res, File f )
791             throws SAXException
792         {
793             if ( f.isDirectory() )
794             {
795                 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
796             }
797 
798             OutputStream os = null;
799             try
800             {
801                 os = new FileOutputStream( f );
802                 IOUtil.copy( res, os );
803             }
804             catch ( IOException e )
805             {
806                 throw new SAXException( e );
807             }
808             finally
809             {
810                 IOUtil.close( os );
811             }
812         }
813     }
814 }