1 package org.apache.maven.doxia.parser;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 import java.io.BufferedReader;
23 import java.io.ByteArrayInputStream;
24 import java.io.File;
25 import java.io.FileOutputStream;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.io.OutputStream;
29 import java.io.Reader;
30 import java.io.StringReader;
31 import java.net.URL;
32 import java.util.Hashtable;
33 import java.util.LinkedHashMap;
34 import java.util.Locale;
35 import java.util.Map;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
38
39 import org.apache.http.HttpEntity;
40 import org.apache.http.HttpResponse;
41 import org.apache.http.HttpStatus;
42 import org.apache.http.client.ClientProtocolException;
43 import org.apache.http.client.HttpRequestRetryHandler;
44 import org.apache.http.client.methods.HttpGet;
45 import org.apache.http.impl.client.DefaultHttpClient;
46 import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
47 import org.apache.http.util.EntityUtils;
48
49 import org.apache.maven.doxia.macro.MacroExecutionException;
50 import org.apache.maven.doxia.markup.XmlMarkup;
51 import org.apache.maven.doxia.sink.Sink;
52 import org.apache.maven.doxia.sink.SinkEventAttributeSet;
53 import org.apache.maven.doxia.util.HtmlTools;
54 import org.apache.maven.doxia.util.XmlValidator;
55
56 import org.codehaus.plexus.util.FileUtils;
57 import org.codehaus.plexus.util.IOUtil;
58 import org.codehaus.plexus.util.StringUtils;
59 import org.codehaus.plexus.util.xml.pull.MXParser;
60 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
61 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
62
63 import org.xml.sax.EntityResolver;
64 import org.xml.sax.InputSource;
65 import org.xml.sax.SAXException;
66
67 /**
68 * An abstract class that defines some convenience methods for <code>XML</code> parsers.
69 *
70 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
71 * @version $Id: AbstractXmlParser.java 1465336 2013-04-07 07:39:00Z hboutemy $
72 * @since 1.0
73 */
74 public abstract class AbstractXmlParser
75 extends AbstractParser
76 implements XmlMarkup
77 {
78 /**
79 * Entity pattern for HTML entity, i.e. &nbsp;
80 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
81 * <br/>
82 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
83 */
84 private static final Pattern PATTERN_ENTITY_1 =
85 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
86
87 /**
88 * Entity pattern for Unicode entity, i.e. &#38;
89 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
90 * <br/>
91 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
92 */
93 private static final Pattern PATTERN_ENTITY_2 =
94 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
95
96 private boolean ignorableWhitespace;
97
98 private boolean collapsibleWhitespace;
99
100 private boolean trimmableWhitespace;
101
102 private Map<String, String> entities;
103
104 private boolean validate = false;
105
106 /** {@inheritDoc} */
107 public void parse( Reader source, Sink sink )
108 throws ParseException
109 {
110 init();
111
112 Reader src = source;
113
114 // 1 first parsing if validation is required
115 if ( isValidate() )
116 {
117 String content;
118 try
119 {
120 content = IOUtil.toString( new BufferedReader( src ) );
121 }
122 catch ( IOException e )
123 {
124 throw new ParseException( "Error reading the model: " + e.getMessage(), e );
125 }
126
127 new XmlValidator( getLog() ).validate( content );
128
129 src = new StringReader( content );
130 }
131
132 // 2 second parsing to process
133 try
134 {
135 XmlPullParser parser = new MXParser();
136
137 parser.setInput( src );
138
139 // allow parser initialization, e.g. for additional entities in XHTML
140 // Note: do it after input is set, otherwise values are reset
141 initXmlParser( parser );
142
143 sink.enableLogging( getLog() );
144
145 parseXml( parser, sink );
146 }
147 catch ( XmlPullParserException ex )
148 {
149 throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
150 ex.getColumnNumber() );
151 }
152 catch ( MacroExecutionException ex )
153 {
154 throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
155 }
156
157 setSecondParsing( false );
158 init();
159 }
160
161 /**
162 * Initializes the parser with custom entities or other options.
163 *
164 * @param parser A parser, not null.
165 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser
166 */
167 protected void initXmlParser( XmlPullParser parser )
168 throws XmlPullParserException
169 {
170 // nop
171 }
172
173 /**
174 * {@inheritDoc}
175 *
176 * Convenience method to parse an arbitrary string and emit any xml events into the given sink.
177 */
178 @Override
179 public void parse( String string, Sink sink )
180 throws ParseException
181 {
182 super.parse( string, sink );
183 }
184
185 /** {@inheritDoc} */
186 @Override
187 public final int getType()
188 {
189 return XML_TYPE;
190 }
191
192 /**
193 * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
194 *
195 * @param parser A parser, not null.
196 * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
197 * @since 1.1
198 */
199 protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
200 {
201 int count = parser.getAttributeCount();
202
203 if ( count < 0 )
204 {
205 return null;
206 }
207
208 SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
209
210 for ( int i = 0; i < count; i++ )
211 {
212 atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
213 }
214
215 return atts;
216 }
217
218 /**
219 * Parse the model from the XmlPullParser into the given sink.
220 *
221 * @param parser A parser, not null.
222 * @param sink the sink to receive the events.
223 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
224 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
225 */
226 private void parseXml( XmlPullParser parser, Sink sink )
227 throws XmlPullParserException, MacroExecutionException
228 {
229 int eventType = parser.getEventType();
230
231 while ( eventType != XmlPullParser.END_DOCUMENT )
232 {
233 if ( eventType == XmlPullParser.START_TAG )
234 {
235 handleStartTag( parser, sink );
236 }
237 else if ( eventType == XmlPullParser.END_TAG )
238 {
239 handleEndTag( parser, sink );
240 }
241 else if ( eventType == XmlPullParser.TEXT )
242 {
243 String text = getText( parser );
244
245 if ( isIgnorableWhitespace() )
246 {
247 if ( text.trim().length() != 0 )
248 {
249 handleText( parser, sink );
250 }
251 }
252 else
253 {
254 handleText( parser, sink );
255 }
256 }
257 else if ( eventType == XmlPullParser.CDSECT )
258 {
259 handleCdsect( parser, sink );
260 }
261 else if ( eventType == XmlPullParser.COMMENT )
262 {
263 handleComment( parser, sink );
264 }
265 else if ( eventType == XmlPullParser.ENTITY_REF )
266 {
267 handleEntity( parser, sink );
268 }
269 else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
270 {
271 // nop
272 }
273 else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
274 {
275 // nop
276 }
277 else if ( eventType == XmlPullParser.DOCDECL )
278 {
279 addLocalEntities( parser, parser.getText() );
280
281 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
282 {
283 addDTDEntities( parser, new String( res ) );
284 }
285 }
286
287 try
288 {
289 eventType = parser.nextToken();
290 }
291 catch ( IOException io )
292 {
293 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
294 }
295 }
296 }
297
298 /**
299 * Goes through the possible start tags.
300 *
301 * @param parser A parser, not null.
302 * @param sink the sink to receive the events.
303 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
304 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
305 */
306 protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
307 throws XmlPullParserException, MacroExecutionException;
308
309 /**
310 * Goes through the possible end tags.
311 *
312 * @param parser A parser, not null.
313 * @param sink the sink to receive the events.
314 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
315 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
316 */
317 protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
318 throws XmlPullParserException, MacroExecutionException;
319
320 /**
321 * Handles text events.
322 *
323 * <p>This is a default implementation, if the parser points to a non-empty text element,
324 * it is emitted as a text event into the specified sink.</p>
325 *
326 * @param parser A parser, not null.
327 * @param sink the sink to receive the events. Not null.
328 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
329 */
330 protected void handleText( XmlPullParser parser, Sink sink )
331 throws XmlPullParserException
332 {
333 String text = getText( parser );
334
335 /*
336 * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
337 * parser so any whitespace that makes it here is significant.
338 */
339 if ( StringUtils.isNotEmpty( text ) )
340 {
341 sink.text( text );
342 }
343 }
344
345 /**
346 * Handles CDATA sections.
347 *
348 * <p>This is a default implementation, all data are emitted as text
349 * events into the specified sink.</p>
350 *
351 * @param parser A parser, not null.
352 * @param sink the sink to receive the events. Not null.
353 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
354 */
355 protected void handleCdsect( XmlPullParser parser, Sink sink )
356 throws XmlPullParserException
357 {
358 sink.text( getText( parser ) );
359 }
360
361 /**
362 * Handles comments.
363 *
364 * <p>This is a default implementation, all data are emitted as comment
365 * events into the specified sink.</p>
366 *
367 * @param parser A parser, not null.
368 * @param sink the sink to receive the events. Not null.
369 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
370 */
371 protected void handleComment( XmlPullParser parser, Sink sink )
372 throws XmlPullParserException
373 {
374 sink.comment( getText( parser ).trim() );
375 }
376
377 /**
378 * Handles entities.
379 *
380 * <p>This is a default implementation, all entities are resolved and emitted as text
381 * events into the specified sink, except:</p>
382 * <ul>
383 * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
384 * are emitted as <code>nonBreakingSpace()</code> events.</li>
385 * </ul>
386 *
387 * @param parser A parser, not null.
388 * @param sink the sink to receive the events. Not null.
389 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
390 */
391 protected void handleEntity( XmlPullParser parser, Sink sink )
392 throws XmlPullParserException
393 {
394 String text = getText( parser );
395
396 String name = parser.getName();
397
398 if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
399 {
400 sink.nonBreakingSpace();
401 }
402 else
403 {
404 String unescaped = HtmlTools.unescapeHTML( text );
405
406 sink.text( unescaped );
407 }
408 }
409
410 /**
411 * Handles an unknown event.
412 *
413 * <p>This is a default implementation, all events are emitted as unknown
414 * events into the specified sink.</p>
415 *
416 * @param parser the parser to get the event from.
417 * @param sink the sink to receive the event.
418 * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
419 * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
420 * It will be passed as the first argument of the required parameters to the Sink
421 * {@link org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
422 * method.
423 */
424 protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
425 {
426 Object[] required = new Object[] { Integer.valueOf( type ) };
427
428 SinkEventAttributeSet attribs = getAttributesFromParser( parser );
429
430 sink.unknown( parser.getName(), required, attribs );
431 }
432
433 /**
434 * <p>isIgnorableWhitespace.</p>
435 *
436 * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
437 * @see #setIgnorableWhitespace(boolean)
438 * @since 1.1
439 */
440 protected boolean isIgnorableWhitespace()
441 {
442 return ignorableWhitespace;
443 }
444
445 /**
446 * Specify that whitespace will be ignored. I.e.:
447 * <pre><tr> <td/> </tr></pre>
448 * is equivalent to
449 * <pre><tr><td/></tr></pre>
450 *
451 * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
452 * @since 1.1
453 */
454 protected void setIgnorableWhitespace( boolean ignorable )
455 {
456 this.ignorableWhitespace = ignorable;
457 }
458
459 /**
460 * <p>isCollapsibleWhitespace.</p>
461 *
462 * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
463 * @see #setCollapsibleWhitespace(boolean)
464 * @since 1.1
465 */
466 protected boolean isCollapsibleWhitespace()
467 {
468 return collapsibleWhitespace;
469 }
470
471 /**
472 * Specify that text will be collapsed. I.e.:
473 * <pre>Text Text</pre>
474 * is equivalent to
475 * <pre>Text Text</pre>
476 *
477 * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
478 * @since 1.1
479 */
480 protected void setCollapsibleWhitespace( boolean collapsible )
481 {
482 this.collapsibleWhitespace = collapsible;
483 }
484
485 /**
486 * <p>isTrimmableWhitespace.</p>
487 *
488 * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
489 * @see #setTrimmableWhitespace(boolean)
490 * @since 1.1
491 */
492 protected boolean isTrimmableWhitespace()
493 {
494 return trimmableWhitespace;
495 }
496
497 /**
498 * Specify that text will be collapsed. I.e.:
499 * <pre><p> Text </p></pre>
500 * is equivalent to
501 * <pre><p>Text</p></pre>
502 *
503 * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
504 * @since 1.1
505 */
506 protected void setTrimmableWhitespace( boolean trimmable )
507 {
508 this.trimmableWhitespace = trimmable;
509 }
510
511 /**
512 * <p>getText.</p>
513 *
514 * @param parser A parser, not null.
515 * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
516 * @see XmlPullParser#getText()
517 * @see #isCollapsibleWhitespace()
518 * @see #isTrimmableWhitespace()
519 * @since 1.1
520 */
521 protected String getText( XmlPullParser parser )
522 {
523 String text = parser.getText();
524
525 if ( isTrimmableWhitespace() )
526 {
527 text = text.trim();
528 }
529
530 if ( isCollapsibleWhitespace() )
531 {
532 StringBuilder newText = new StringBuilder();
533 String[] elts = StringUtils.split( text, " \r\n" );
534 for ( int i = 0; i < elts.length; i++ )
535 {
536 newText.append( elts[i] );
537 if ( ( i + 1 ) < elts.length )
538 {
539 newText.append( " " );
540 }
541 }
542 text = newText.toString();
543 }
544
545 return text;
546 }
547
548 /**
549 * Return the defined entities in a local doctype. I.e.:
550 * <pre>
551 * <!DOCTYPE foo [
552 * <!ENTITY bar "&#x160;">
553 * <!ENTITY bar1 "&#x161;">
554 * ]>
555 * </pre>
556 *
557 * @return a map of the defined entities in a local doctype.
558 * @since 1.1
559 */
560 protected Map<String, String> getLocalEntities()
561 {
562 if ( entities == null )
563 {
564 entities = new LinkedHashMap<String, String>();
565 }
566
567 return entities;
568 }
569
570 /**
571 * <p>isValidate.</p>
572 *
573 * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
574 * @since 1.1
575 */
576 public boolean isValidate()
577 {
578 return validate;
579 }
580
581 /**
582 * Specify a flag to validate or not the XML content.
583 *
584 * @param validate the validate to set
585 * @see #parse(Reader, Sink)
586 * @since 1.1
587 */
588 public void setValidate( boolean validate )
589 {
590 this.validate = validate;
591 }
592
593 // ----------------------------------------------------------------------
594 // Private methods
595 // ----------------------------------------------------------------------
596
597 /**
598 * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
599 * <br/>
600 * By default, we exclude the default XML entities: &amp;, &lt;, &gt;, &quot; and &apos;.
601 *
602 * @param parser not null
603 * @param entityName not null
604 * @param entityValue not null
605 * @throws XmlPullParserException if any
606 * @see {@link XmlPullParser#defineEntityReplacementText(String, String)}
607 */
608 private void addEntity( XmlPullParser parser, String entityName, String entityValue )
609 throws XmlPullParserException
610 {
611 if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
612 || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
613 {
614 return;
615 }
616
617 parser.defineEntityReplacementText( entityName, entityValue );
618 getLocalEntities().put( entityName, entityValue );
619 }
620
621 /**
622 * Handle entities defined in a local doctype as the following:
623 * <pre>
624 * <!DOCTYPE foo [
625 * <!ENTITY bar "&#x160;">
626 * <!ENTITY bar1 "&#x161;">
627 * ]>
628 * </pre>
629 *
630 * @param parser not null
631 * @param text not null
632 * @throws XmlPullParserException if any
633 */
634 private void addLocalEntities( XmlPullParser parser, String text )
635 throws XmlPullParserException
636 {
637 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
638 if ( entitiesCount > 0 )
639 {
640 // text should be foo [...]
641 int start = text.indexOf( '[' );
642 int end = text.lastIndexOf( ']' );
643 if ( start != -1 && end != -1 )
644 {
645 addDTDEntities( parser, text.substring( start + 1, end ) );
646 }
647 }
648 }
649
650 /**
651 * Handle entities defined in external doctypes as the following:
652 * <pre>
653 * <!DOCTYPE foo [
654 * <!-- These are the entity sets for ISO Latin 1 characters for the XHTML -->
655 * <!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
656 * "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent">
657 * %HTMLlat1;
658 * ]>
659 * </pre>
660 *
661 * @param parser not null
662 * @param text not null
663 * @throws XmlPullParserException if any
664 */
665 private void addDTDEntities( XmlPullParser parser, String text )
666 throws XmlPullParserException
667 {
668 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
669 if ( entitiesCount > 0 )
670 {
671 final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
672 BufferedReader reader = new BufferedReader( new StringReader( txt ) );
673 String line;
674 String tmpLine = "";
675 try
676 {
677 Matcher matcher;
678 while ( ( line = reader.readLine() ) != null )
679 {
680 tmpLine += "\n" + line;
681 matcher = PATTERN_ENTITY_1.matcher( tmpLine );
682 if ( matcher.find() && matcher.groupCount() == 7 )
683 {
684 String entityName = matcher.group( 2 );
685 String entityValue = matcher.group( 5 );
686
687 addEntity( parser, entityName, entityValue );
688 tmpLine = "";
689 }
690 else
691 {
692 matcher = PATTERN_ENTITY_2.matcher( tmpLine );
693 if ( matcher.find() && matcher.groupCount() == 8 )
694 {
695 String entityName = matcher.group( 2 );
696 String entityValue = matcher.group( 5 );
697
698 addEntity( parser, entityName, entityValue );
699 tmpLine = "";
700 }
701 }
702 }
703 }
704 catch ( IOException e )
705 {
706 // nop
707 }
708 finally
709 {
710 IOUtil.close( reader );
711 }
712 }
713 }
714
715 /**
716 * Implementation of the callback mechanism <code>EntityResolver</code>.
717 * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
718 */
719 public static class CachedFileEntityResolver
720 implements EntityResolver
721 {
722 /** Map with systemId as key and the content of systemId as byte[]. */
723 protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<String, byte[]>();
724
725 /** {@inheritDoc} */
726 public InputSource resolveEntity( String publicId, String systemId )
727 throws SAXException, IOException
728 {
729 byte[] res = ENTITY_CACHE.get( systemId );
730 // already cached?
731 if ( res == null )
732 {
733 String systemName = FileUtils.getFile( systemId ).getName();
734 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
735 // maybe already as a temp file?
736 if ( !temp.exists() )
737 {
738 // is systemId a file or an url?
739 if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
740 {
741 // Doxia XSDs are included in the jars, so try to find the resource systemName from
742 // the classpath...
743 String resource = "/" + systemName;
744 URL url = getClass().getResource( resource );
745 if ( url != null )
746 {
747 res = toByteArray( url );
748 }
749 else
750 {
751 throw new SAXException( "Could not find the SYSTEM entity: " + systemId
752 + " because '" + resource + "' is not available of the classpath." );
753 }
754 }
755 else
756 {
757 res = toByteArray( new URL( systemId ) );
758 }
759
760 // write systemId as temp file
761 copy( res, temp );
762 }
763 else
764 {
765 // TODO How to refresh Doxia XSDs from temp dir?
766 res = toByteArray( temp.toURI().toURL() );
767 }
768
769 ENTITY_CACHE.put( systemId, res );
770 }
771
772 InputSource is = new InputSource( new ByteArrayInputStream( res ) );
773 is.setPublicId( publicId );
774 is.setSystemId( systemId );
775
776 return is;
777 }
778
779 /**
780 * If url is not an http/https urls, call {@link IOUtil#toByteArray(java.io.InputStream)} to get the url
781 * content.
782 * Otherwise, use HttpClient to get the http content.
783 * Wrap all internal exceptions to throw SAXException.
784 *
785 * @param url not null
786 * @return return an array of byte
787 * @throws SAXException if any
788 */
789 private static byte[] toByteArray( URL url )
790 throws SAXException
791 {
792 if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
793 {
794 InputStream is = null;
795 try
796 {
797 is = url.openStream();
798 if ( is == null )
799 {
800 throw new SAXException( "Cannot open stream from the url: " + url.toString() );
801 }
802 return IOUtil.toByteArray( is );
803 }
804 catch ( IOException e )
805 {
806 throw new SAXException( "IOException: " + e.getMessage(), e );
807 }
808 finally
809 {
810 IOUtil.close( is );
811 }
812 }
813
814 // it is an HTTP url, using HttpClient...
815 DefaultHttpClient client = new DefaultHttpClient();
816 HttpGet method = new HttpGet( url.toString() );
817 // Set a user-agent that doesn't contain the word "java", otherwise it will be blocked by the W3C
818 // The default user-agent is "Apache-HttpClient/4.0.2 (java 1.5)"
819 method.setHeader( "user-agent", "Apache-Doxia/" + doxiaVersion() );
820
821 HttpRequestRetryHandler retryHandler = new DefaultHttpRequestRetryHandler( 3, false );
822 client.setHttpRequestRetryHandler( retryHandler );
823
824 HttpEntity entity = null;
825 try
826 {
827 HttpResponse response = client.execute( method );
828 int statusCode = response.getStatusLine().getStatusCode();
829 if ( statusCode != HttpStatus.SC_OK )
830 {
831 throw new IOException( "The status code when accessing the URL '" + url.toString() + "' was "
832 + statusCode + ", which is not allowed. The server gave this reason for the failure '"
833 + response.getStatusLine().getReasonPhrase() + "'." );
834 }
835
836 entity = response.getEntity();
837 return EntityUtils.toByteArray( entity );
838 }
839 catch ( ClientProtocolException e )
840 {
841 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
842 }
843 catch ( IOException e )
844 {
845 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
846 }
847 finally
848 {
849 if ( entity != null )
850 {
851 try
852 {
853 entity.consumeContent();
854 }
855 catch ( IOException e )
856 {
857 // Ignore
858 }
859 }
860 }
861 }
862
863 /**
864 * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException.
865 *
866 * @param res not null array of byte
867 * @param f the file where to write the bytes
868 * @throws SAXException if any
869 * @see {@link IOUtil#copy(byte[], OutputStream)}
870 */
871 private void copy( byte[] res, File f )
872 throws SAXException
873 {
874 if ( f.isDirectory() )
875 {
876 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
877 }
878
879 OutputStream os = null;
880 try
881 {
882 os = new FileOutputStream( f );
883 IOUtil.copy( res, os );
884 }
885 catch ( IOException e )
886 {
887 throw new SAXException( "IOException: " + e.getMessage(), e );
888 }
889 finally
890 {
891 IOUtil.close( os );
892 }
893 }
894 }
895 }