1 package org.apache.maven.doxia.parser;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 import java.io.BufferedReader;
23 import java.io.ByteArrayInputStream;
24 import java.io.File;
25 import java.io.FileOutputStream;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.io.OutputStream;
29 import java.io.Reader;
30 import java.io.StringReader;
31 import java.net.URL;
32 import java.util.Hashtable;
33 import java.util.LinkedHashMap;
34 import java.util.Locale;
35 import java.util.Map;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
38
39 import org.apache.http.HttpEntity;
40 import org.apache.http.HttpResponse;
41 import org.apache.http.HttpStatus;
42 import org.apache.http.client.ClientProtocolException;
43 import org.apache.http.client.HttpRequestRetryHandler;
44 import org.apache.http.client.methods.HttpGet;
45 import org.apache.http.impl.client.DefaultHttpClient;
46 import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
47 import org.apache.http.util.EntityUtils;
48
49 import org.apache.maven.doxia.macro.MacroExecutionException;
50 import org.apache.maven.doxia.markup.XmlMarkup;
51 import org.apache.maven.doxia.sink.Sink;
52 import org.apache.maven.doxia.sink.SinkEventAttributeSet;
53 import org.apache.maven.doxia.util.HtmlTools;
54 import org.apache.maven.doxia.util.XmlValidator;
55
56 import org.codehaus.plexus.util.FileUtils;
57 import org.codehaus.plexus.util.IOUtil;
58 import org.codehaus.plexus.util.StringUtils;
59 import org.codehaus.plexus.util.xml.pull.MXParser;
60 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
61 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
62
63 import org.xml.sax.EntityResolver;
64 import org.xml.sax.InputSource;
65 import org.xml.sax.SAXException;
66
67 /**
68 * An abstract class that defines some convenience methods for <code>XML</code> parsers.
69 *
70 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
71 * @version $Id: AbstractXmlParser.java 1185112 2011-10-17 11:33:00Z ltheussl $
72 * @since 1.0
73 */
74 public abstract class AbstractXmlParser
75 extends AbstractParser
76 implements XmlMarkup
77 {
78 /**
79 * Entity pattern for HTML entity, i.e. &nbsp;
80 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
81 * <br/>
82 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
83 */
84 private static final Pattern PATTERN_ENTITY_1 =
85 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
86
87 /**
88 * Entity pattern for Unicode entity, i.e. &#38;
89 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
90 * <br/>
91 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
92 */
93 private static final Pattern PATTERN_ENTITY_2 =
94 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
95
96 private boolean ignorableWhitespace;
97
98 private boolean collapsibleWhitespace;
99
100 private boolean trimmableWhitespace;
101
102 private Map<String, String> entities;
103
104 private boolean validate = false;
105
106 /** {@inheritDoc} */
107 public void parse( Reader source, Sink sink )
108 throws ParseException
109 {
110 init();
111
112 Reader src = source;
113
114 // 1 first parsing if validation is required
115 if ( isValidate() )
116 {
117 String content;
118 try
119 {
120 content = IOUtil.toString( new BufferedReader( src ) );
121 }
122 catch ( IOException e )
123 {
124 throw new ParseException( "Error reading the model: " + e.getMessage(), e );
125 }
126
127 new XmlValidator( getLog() ).validate( content );
128
129 src = new StringReader( content );
130 }
131
132 // 2 second parsing to process
133 try
134 {
135 XmlPullParser parser = new MXParser();
136
137 parser.setInput( src );
138
139 sink.enableLogging( getLog() );
140
141 parseXml( parser, sink );
142 }
143 catch ( XmlPullParserException ex )
144 {
145 throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
146 ex.getColumnNumber() );
147 }
148 catch ( MacroExecutionException ex )
149 {
150 throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
151 }
152
153 setSecondParsing( false );
154 init();
155 }
156
157 /**
158 * {@inheritDoc}
159 *
160 * Convenience method to parse an arbitrary string and emit any xml events into the given sink.
161 */
162 @Override
163 public void parse( String string, Sink sink )
164 throws ParseException
165 {
166 super.parse( string, sink );
167 }
168
169 /** {@inheritDoc} */
170 @Override
171 public final int getType()
172 {
173 return XML_TYPE;
174 }
175
176 /**
177 * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
178 *
179 * @param parser A parser, not null.
180 * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
181 * @since 1.1
182 */
183 protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
184 {
185 int count = parser.getAttributeCount();
186
187 if ( count < 0 )
188 {
189 return null;
190 }
191
192 SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
193
194 for ( int i = 0; i < count; i++ )
195 {
196 atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
197 }
198
199 return atts;
200 }
201
202 /**
203 * Parse the model from the XmlPullParser into the given sink.
204 *
205 * @param parser A parser, not null.
206 * @param sink the sink to receive the events.
207 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
208 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
209 */
210 private void parseXml( XmlPullParser parser, Sink sink )
211 throws XmlPullParserException, MacroExecutionException
212 {
213 int eventType = parser.getEventType();
214
215 while ( eventType != XmlPullParser.END_DOCUMENT )
216 {
217 if ( eventType == XmlPullParser.START_TAG )
218 {
219 handleStartTag( parser, sink );
220 }
221 else if ( eventType == XmlPullParser.END_TAG )
222 {
223 handleEndTag( parser, sink );
224 }
225 else if ( eventType == XmlPullParser.TEXT )
226 {
227 String text = getText( parser );
228
229 if ( isIgnorableWhitespace() )
230 {
231 if ( text.trim().length() != 0 )
232 {
233 handleText( parser, sink );
234 }
235 }
236 else
237 {
238 handleText( parser, sink );
239 }
240 }
241 else if ( eventType == XmlPullParser.CDSECT )
242 {
243 handleCdsect( parser, sink );
244 }
245 else if ( eventType == XmlPullParser.COMMENT )
246 {
247 handleComment( parser, sink );
248 }
249 else if ( eventType == XmlPullParser.ENTITY_REF )
250 {
251 handleEntity( parser, sink );
252 }
253 else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
254 {
255 // nop
256 }
257 else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
258 {
259 // nop
260 }
261 else if ( eventType == XmlPullParser.DOCDECL )
262 {
263 addLocalEntities( parser, parser.getText() );
264
265 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
266 {
267 addDTDEntities( parser, new String( res ) );
268 }
269 }
270
271 try
272 {
273 eventType = parser.nextToken();
274 }
275 catch ( IOException io )
276 {
277 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
278 }
279 }
280 }
281
282 /**
283 * Goes through the possible start tags.
284 *
285 * @param parser A parser, not null.
286 * @param sink the sink to receive the events.
287 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
288 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
289 */
290 protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
291 throws XmlPullParserException, MacroExecutionException;
292
293 /**
294 * Goes through the possible end tags.
295 *
296 * @param parser A parser, not null.
297 * @param sink the sink to receive the events.
298 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
299 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
300 */
301 protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
302 throws XmlPullParserException, MacroExecutionException;
303
304 /**
305 * Handles text events.
306 *
307 * <p>This is a default implementation, if the parser points to a non-empty text element,
308 * it is emitted as a text event into the specified sink.</p>
309 *
310 * @param parser A parser, not null.
311 * @param sink the sink to receive the events. Not null.
312 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
313 */
314 protected void handleText( XmlPullParser parser, Sink sink )
315 throws XmlPullParserException
316 {
317 String text = getText( parser );
318
319 /*
320 * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
321 * parser so any whitespace that makes it here is significant.
322 */
323 if ( StringUtils.isNotEmpty( text ) )
324 {
325 sink.text( text );
326 }
327 }
328
329 /**
330 * Handles CDATA sections.
331 *
332 * <p>This is a default implementation, all data are emitted as text
333 * events into the specified sink.</p>
334 *
335 * @param parser A parser, not null.
336 * @param sink the sink to receive the events. Not null.
337 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
338 */
339 protected void handleCdsect( XmlPullParser parser, Sink sink )
340 throws XmlPullParserException
341 {
342 sink.text( getText( parser ) );
343 }
344
345 /**
346 * Handles comments.
347 *
348 * <p>This is a default implementation, all data are emitted as comment
349 * events into the specified sink.</p>
350 *
351 * @param parser A parser, not null.
352 * @param sink the sink to receive the events. Not null.
353 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
354 */
355 protected void handleComment( XmlPullParser parser, Sink sink )
356 throws XmlPullParserException
357 {
358 sink.comment( getText( parser ).trim() );
359 }
360
361 /**
362 * Handles entities.
363 *
364 * <p>This is a default implementation, all entities are resolved and emitted as text
365 * events into the specified sink, except:</p>
366 * <ul>
367 * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
368 * are emitted as <code>nonBreakingSpace()</code> events.</li>
369 * </ul>
370 *
371 * @param parser A parser, not null.
372 * @param sink the sink to receive the events. Not null.
373 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
374 */
375 protected void handleEntity( XmlPullParser parser, Sink sink )
376 throws XmlPullParserException
377 {
378 String text = getText( parser );
379
380 String name = parser.getName();
381
382 if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
383 {
384 sink.nonBreakingSpace();
385 }
386 else
387 {
388 String unescaped = HtmlTools.unescapeHTML( text );
389
390 sink.text( unescaped );
391 }
392 }
393
394 /**
395 * Handles an unkown event.
396 *
397 * <p>This is a default implementation, all events are emitted as unknown
398 * events into the specified sink.</p>
399 *
400 * @param parser the parser to get the event from.
401 * @param sink the sink to receive the event.
402 * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
403 * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
404 * It will be passed as the first argument of the required parameters to the Sink
405 * {@link org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
406 * method.
407 */
408 protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
409 {
410 Object[] required = new Object[] { new Integer( type ) };
411
412 SinkEventAttributeSet attribs = getAttributesFromParser( parser );
413
414 sink.unknown( parser.getName(), required, attribs );
415 }
416
417 /**
418 * <p>isIgnorableWhitespace.</p>
419 *
420 * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
421 * @see #setIgnorableWhitespace(boolean)
422 * @since 1.1
423 */
424 protected boolean isIgnorableWhitespace()
425 {
426 return ignorableWhitespace;
427 }
428
429 /**
430 * Specify that whitespace will be ignored. I.e.:
431 * <pre><tr> <td/> </tr></pre>
432 * is equivalent to
433 * <pre><tr><td/></tr></pre>
434 *
435 * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
436 * @since 1.1
437 */
438 protected void setIgnorableWhitespace( boolean ignorable )
439 {
440 this.ignorableWhitespace = ignorable;
441 }
442
443 /**
444 * <p>isCollapsibleWhitespace.</p>
445 *
446 * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
447 * @see #setCollapsibleWhitespace(boolean)
448 * @since 1.1
449 */
450 protected boolean isCollapsibleWhitespace()
451 {
452 return collapsibleWhitespace;
453 }
454
455 /**
456 * Specify that text will be collapsed. I.e.:
457 * <pre>Text Text</pre>
458 * is equivalent to
459 * <pre>Text Text</pre>
460 *
461 * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
462 * @since 1.1
463 */
464 protected void setCollapsibleWhitespace( boolean collapsible )
465 {
466 this.collapsibleWhitespace = collapsible;
467 }
468
469 /**
470 * <p>isTrimmableWhitespace.</p>
471 *
472 * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
473 * @see #setTrimmableWhitespace(boolean)
474 * @since 1.1
475 */
476 protected boolean isTrimmableWhitespace()
477 {
478 return trimmableWhitespace;
479 }
480
481 /**
482 * Specify that text will be collapsed. I.e.:
483 * <pre><p> Text </p></pre>
484 * is equivalent to
485 * <pre><p>Text</p></pre>
486 *
487 * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
488 * @since 1.1
489 */
490 protected void setTrimmableWhitespace( boolean trimmable )
491 {
492 this.trimmableWhitespace = trimmable;
493 }
494
495 /**
496 * <p>getText.</p>
497 *
498 * @param parser A parser, not null.
499 * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
500 * @see XmlPullParser#getText()
501 * @see #isCollapsibleWhitespace()
502 * @see #isTrimmableWhitespace()
503 * @since 1.1
504 */
505 protected String getText( XmlPullParser parser )
506 {
507 String text = parser.getText();
508
509 if ( isTrimmableWhitespace() )
510 {
511 text = text.trim();
512 }
513
514 if ( isCollapsibleWhitespace() )
515 {
516 StringBuilder newText = new StringBuilder();
517 String[] elts = StringUtils.split( text, " \r\n" );
518 for ( int i = 0; i < elts.length; i++ )
519 {
520 newText.append( elts[i] );
521 if ( ( i + 1 ) < elts.length )
522 {
523 newText.append( " " );
524 }
525 }
526 text = newText.toString();
527 }
528
529 return text;
530 }
531
532 /**
533 * Return the defined entities in a local doctype. I.e.:
534 * <pre>
535 * <!DOCTYPE foo [
536 * <!ENTITY bar "&#x160;">
537 * <!ENTITY bar1 "&#x161;">
538 * ]>
539 * </pre>
540 *
541 * @return a map of the defined entities in a local doctype.
542 * @since 1.1
543 */
544 protected Map<String, String> getLocalEntities()
545 {
546 if ( entities == null )
547 {
548 entities = new LinkedHashMap<String, String>();
549 }
550
551 return entities;
552 }
553
554 /**
555 * <p>isValidate.</p>
556 *
557 * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
558 * @since 1.1
559 */
560 public boolean isValidate()
561 {
562 return validate;
563 }
564
565 /**
566 * Specify a flag to validate or not the XML content.
567 *
568 * @param validate the validate to set
569 * @see #parse(Reader, Sink)
570 * @since 1.1
571 */
572 public void setValidate( boolean validate )
573 {
574 this.validate = validate;
575 }
576
577 // ----------------------------------------------------------------------
578 // Private methods
579 // ----------------------------------------------------------------------
580
581 /**
582 * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
583 * <br/>
584 * By default, we exclude the default XML entities: &amp;, &lt;, &gt;, &quot; and &apos;.
585 *
586 * @param parser not null
587 * @param entityName not null
588 * @param entityValue not null
589 * @throws XmlPullParserException if any
590 * @see {@link XmlPullParser#defineEntityReplacementText(String, String)}
591 */
592 private void addEntity( XmlPullParser parser, String entityName, String entityValue )
593 throws XmlPullParserException
594 {
595 if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
596 || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
597 {
598 return;
599 }
600
601 parser.defineEntityReplacementText( entityName, entityValue );
602 getLocalEntities().put( entityName, entityValue );
603 }
604
605 /**
606 * Handle entities defined in a local doctype as the following:
607 * <pre>
608 * <!DOCTYPE foo [
609 * <!ENTITY bar "&#x160;">
610 * <!ENTITY bar1 "&#x161;">
611 * ]>
612 * </pre>
613 *
614 * @param parser not null
615 * @param text not null
616 * @throws XmlPullParserException if any
617 */
618 private void addLocalEntities( XmlPullParser parser, String text )
619 throws XmlPullParserException
620 {
621 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
622 if ( entitiesCount > 0 )
623 {
624 // text should be foo [...]
625 int start = text.indexOf( '[');
626 int end = text.lastIndexOf( ']');
627 if ( start != -1 && end != -1 )
628 {
629 addDTDEntities( parser, text.substring( start + 1, end ) );
630 }
631 }
632 }
633
634 /**
635 * Handle entities defined in external doctypes as the following:
636 * <pre>
637 * <!DOCTYPE foo [
638 * <!-- These are the entity sets for ISO Latin 1 characters for the XHTML -->
639 * <!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
640 * "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent">
641 * %HTMLlat1;
642 * ]>
643 * </pre>
644 *
645 * @param parser not null
646 * @param text not null
647 * @throws XmlPullParserException if any
648 */
649 private void addDTDEntities( XmlPullParser parser, String text )
650 throws XmlPullParserException
651 {
652 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
653 if ( entitiesCount > 0 )
654 {
655 final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
656 BufferedReader reader = new BufferedReader( new StringReader( txt ) );
657 String line;
658 String tmpLine = "";
659 try
660 {
661 Matcher matcher;
662 while ( ( line = reader.readLine() ) != null )
663 {
664 tmpLine += "\n" + line;
665 matcher = PATTERN_ENTITY_1.matcher( tmpLine );
666 if ( matcher.find() && matcher.groupCount() == 7 )
667 {
668 String entityName = matcher.group( 2 );
669 String entityValue = matcher.group( 5 );
670
671 addEntity( parser, entityName, entityValue );
672 tmpLine = "";
673 }
674 else
675 {
676 matcher = PATTERN_ENTITY_2.matcher( tmpLine );
677 if ( matcher.find() && matcher.groupCount() == 8 )
678 {
679 String entityName = matcher.group( 2 );
680 String entityValue = matcher.group( 5 );
681
682 addEntity( parser, entityName, entityValue );
683 tmpLine = "";
684 }
685 }
686 }
687 }
688 catch ( IOException e )
689 {
690 // nop
691 }
692 finally
693 {
694 IOUtil.close( reader );
695 }
696 }
697 }
698
699 /**
700 * Implementation of the callback mechanism <code>EntityResolver</code>.
701 * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
702 */
703 public static class CachedFileEntityResolver
704 implements EntityResolver
705 {
706 /** Map with systemId as key and the content of systemId as byte[]. */
707 protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<String, byte[]>();
708
709 /** {@inheritDoc} */
710 public InputSource resolveEntity( String publicId, String systemId )
711 throws SAXException, IOException
712 {
713 byte[] res = ENTITY_CACHE.get( systemId );
714 // already cached?
715 if ( res == null )
716 {
717 String systemName = FileUtils.getFile( systemId ).getName();
718 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
719 // maybe already as a temp file?
720 if ( !temp.exists() )
721 {
722 // is systemId a file or an url?
723 if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
724 {
725 // Doxia XSDs are included in the jars, so try to find the resource systemName from
726 // the classpath...
727 String resource = "/" + systemName;
728 URL url = getClass().getResource( resource );
729 if ( url != null )
730 {
731 res = toByteArray( url );
732 }
733 else
734 {
735 throw new SAXException( "Could not find the SYSTEM entity: " + systemId
736 + " because '" + resource + "' is not available of the classpath." );
737 }
738 }
739 else
740 {
741 res = toByteArray( new URL( systemId ) );
742 }
743
744 // write systemId as temp file
745 copy( res, temp );
746 }
747 else
748 {
749 // TODO How to refresh Doxia XSDs from temp dir?
750 res = toByteArray( temp.toURI().toURL() );
751 }
752
753 ENTITY_CACHE.put( systemId, res );
754 }
755
756 InputSource is = new InputSource( new ByteArrayInputStream( res ) );
757 is.setPublicId( publicId );
758 is.setSystemId( systemId );
759
760 return is;
761 }
762
763 /**
764 * If url is not an http/https urls, call {@link IOUtil#toByteArray(java.io.InputStream)} to get the url
765 * content.
766 * Otherwise, use HttpClient to get the http content.
767 * Wrap all internal exceptions to throw SAXException.
768 *
769 * @param url not null
770 * @return return an array of byte
771 * @throws SAXException if any
772 */
773 private static byte[] toByteArray( URL url )
774 throws SAXException
775 {
776 if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
777 {
778 InputStream is = null;
779 try
780 {
781 is = url.openStream();
782 if ( is == null )
783 {
784 throw new SAXException( "Cannot open stream from the url: " + url.toString() );
785 }
786 return IOUtil.toByteArray( is );
787 }
788 catch ( IOException e )
789 {
790 throw new SAXException( "IOException: " + e.getMessage(), e );
791 }
792 finally
793 {
794 IOUtil.close( is );
795 }
796 }
797
798 // it is an HTTP url, using HttpClient...
799 DefaultHttpClient client = new DefaultHttpClient();
800 HttpGet method = new HttpGet( url.toString() );
801 // Set a user-agent that doesn't contain the word "java", otherwise it will be blocked by the W3C
802 // The default user-agent is "Apache-HttpClient/4.0.2 (java 1.5)"
803 method.setHeader( "user-agent", "Apache-Doxia/" + doxiaVersion() );
804
805 HttpRequestRetryHandler retryHandler = new DefaultHttpRequestRetryHandler( 3, false );
806 client.setHttpRequestRetryHandler( retryHandler );
807
808 HttpEntity entity = null;
809 try
810 {
811 HttpResponse response = client.execute( method );
812 int statusCode = response.getStatusLine().getStatusCode();
813 if ( statusCode != HttpStatus.SC_OK )
814 {
815 throw new IOException( "The status code when accessing the URL '" + url.toString() + "' was "
816 + statusCode + ", which is not allowed. The server gave this reason for the failure '"
817 + response.getStatusLine().getReasonPhrase() + "'." );
818 }
819
820 entity = response.getEntity();
821 return EntityUtils.toByteArray( entity );
822 }
823 catch ( ClientProtocolException e )
824 {
825 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
826 }
827 catch ( IOException e )
828 {
829 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
830 }
831 finally
832 {
833 if ( entity != null )
834 {
835 try
836 {
837 entity.consumeContent();
838 }
839 catch ( IOException e )
840 {
841 // Ignore
842 }
843 }
844 }
845 }
846
847 /**
848 * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException.
849 *
850 * @param res not null array of byte
851 * @param f the file where to write the bytes
852 * @throws SAXException if any
853 * @see {@link IOUtil#copy(byte[], OutputStream)}
854 */
855 private void copy( byte[] res, File f )
856 throws SAXException
857 {
858 if ( f.isDirectory() )
859 {
860 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
861 }
862
863 OutputStream os = null;
864 try
865 {
866 os = new FileOutputStream( f );
867 IOUtil.copy( res, os );
868 }
869 catch ( IOException e )
870 {
871 throw new SAXException( "IOException: " + e.getMessage(), e );
872 }
873 finally
874 {
875 IOUtil.close( os );
876 }
877 }
878 }
879 }