001package org.apache.maven.doxia.parser; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import java.io.BufferedReader; 023import java.io.ByteArrayInputStream; 024import java.io.File; 025import java.io.FileOutputStream; 026import java.io.IOException; 027import java.io.InputStream; 028import java.io.OutputStream; 029import java.io.Reader; 030import java.io.StringReader; 031import java.net.URL; 032import java.util.Hashtable; 033import java.util.LinkedHashMap; 034import java.util.Locale; 035import java.util.Map; 036import java.util.regex.Matcher; 037import java.util.regex.Pattern; 038 039import org.apache.http.HttpEntity; 040import org.apache.http.HttpResponse; 041import org.apache.http.HttpStatus; 042import org.apache.http.client.ClientProtocolException; 043import org.apache.http.client.HttpRequestRetryHandler; 044import org.apache.http.client.methods.HttpGet; 045import org.apache.http.impl.client.DefaultHttpClient; 046import org.apache.http.impl.client.DefaultHttpRequestRetryHandler; 047import org.apache.http.util.EntityUtils; 048 049import org.apache.maven.doxia.macro.MacroExecutionException; 050import org.apache.maven.doxia.markup.XmlMarkup; 051import org.apache.maven.doxia.sink.Sink; 052import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet; 053import org.apache.maven.doxia.util.HtmlTools; 054import org.apache.maven.doxia.util.XmlValidator; 055 056import org.codehaus.plexus.util.FileUtils; 057import org.codehaus.plexus.util.IOUtil; 058import org.codehaus.plexus.util.StringUtils; 059import org.codehaus.plexus.util.xml.pull.MXParser; 060import org.codehaus.plexus.util.xml.pull.XmlPullParser; 061import org.codehaus.plexus.util.xml.pull.XmlPullParserException; 062 063import org.xml.sax.EntityResolver; 064import org.xml.sax.InputSource; 065import org.xml.sax.SAXException; 066 067/** 068 * An abstract class that defines some convenience methods for <code>XML</code> parsers. 069 * 070 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a> 071 * @version $Id: AbstractXmlParser.CachedFileEntityResolver.html 979316 2016-02-02 21:51:43Z hboutemy $ 072 * @since 1.0 073 */ 074public abstract class AbstractXmlParser 075 extends AbstractParser 076 implements XmlMarkup 077{ 078 /** 079 * Entity pattern for HTML entity, i.e. &nbsp; 080 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*> 081 * <br/> 082 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>. 083 */ 084 private static final Pattern PATTERN_ENTITY_1 = 085 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" ); 086 087 /** 088 * Entity pattern for Unicode entity, i.e. &#38; 089 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" 090 * <br/> 091 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>. 092 */ 093 private static final Pattern PATTERN_ENTITY_2 = 094 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" ); 095 096 private boolean ignorableWhitespace; 097 098 private boolean collapsibleWhitespace; 099 100 private boolean trimmableWhitespace; 101 102 private Map<String, String> entities; 103 104 private boolean validate = false; 105 106 /** {@inheritDoc} */ 107 public void parse( Reader source, Sink sink ) 108 throws ParseException 109 { 110 init(); 111 112 Reader src = source; 113 114 // 1 first parsing if validation is required 115 if ( isValidate() ) 116 { 117 String content; 118 try 119 { 120 content = IOUtil.toString( new BufferedReader( src ) ); 121 } 122 catch ( IOException e ) 123 { 124 throw new ParseException( "Error reading the model: " + e.getMessage(), e ); 125 } 126 127 new XmlValidator( getLog() ).validate( content ); 128 129 src = new StringReader( content ); 130 } 131 132 // 2 second parsing to process 133 try 134 { 135 XmlPullParser parser = new MXParser(); 136 137 parser.setInput( src ); 138 139 // allow parser initialization, e.g. for additional entities in XHTML 140 // Note: do it after input is set, otherwise values are reset 141 initXmlParser( parser ); 142 143 sink.enableLogging( getLog() ); 144 145 parseXml( parser, sink ); 146 } 147 catch ( XmlPullParserException ex ) 148 { 149 throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(), 150 ex.getColumnNumber() ); 151 } 152 catch ( MacroExecutionException ex ) 153 { 154 throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex ); 155 } 156 157 setSecondParsing( false ); 158 init(); 159 } 160 161 /** 162 * Initializes the parser with custom entities or other options. 163 * 164 * @param parser A parser, not null. 165 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser 166 */ 167 protected void initXmlParser( XmlPullParser parser ) 168 throws XmlPullParserException 169 { 170 // nop 171 } 172 173 /** 174 * {@inheritDoc} 175 * 176 * Convenience method to parse an arbitrary string and emit any xml events into the given sink. 177 */ 178 @Override 179 public void parse( String string, Sink sink ) 180 throws ParseException 181 { 182 super.parse( string, sink ); 183 } 184 185 /** {@inheritDoc} */ 186 @Override 187 public final int getType() 188 { 189 return XML_TYPE; 190 } 191 192 /** 193 * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet. 194 * 195 * @param parser A parser, not null. 196 * @return a SinkEventAttributeSet or null if the current parser event is not a start tag. 197 * @since 1.1 198 */ 199 protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser ) 200 { 201 int count = parser.getAttributeCount(); 202 203 if ( count < 0 ) 204 { 205 return null; 206 } 207 208 SinkEventAttributeSet atts = new SinkEventAttributeSet( count ); 209 210 for ( int i = 0; i < count; i++ ) 211 { 212 atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) ); 213 } 214 215 return atts; 216 } 217 218 /** 219 * Parse the model from the XmlPullParser into the given sink. 220 * 221 * @param parser A parser, not null. 222 * @param sink the sink to receive the events. 223 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 224 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro 225 */ 226 private void parseXml( XmlPullParser parser, Sink sink ) 227 throws XmlPullParserException, MacroExecutionException 228 { 229 int eventType = parser.getEventType(); 230 231 while ( eventType != XmlPullParser.END_DOCUMENT ) 232 { 233 if ( eventType == XmlPullParser.START_TAG ) 234 { 235 handleStartTag( parser, sink ); 236 } 237 else if ( eventType == XmlPullParser.END_TAG ) 238 { 239 handleEndTag( parser, sink ); 240 } 241 else if ( eventType == XmlPullParser.TEXT ) 242 { 243 String text = getText( parser ); 244 245 if ( isIgnorableWhitespace() ) 246 { 247 if ( text.trim().length() != 0 ) 248 { 249 handleText( parser, sink ); 250 } 251 } 252 else 253 { 254 handleText( parser, sink ); 255 } 256 } 257 else if ( eventType == XmlPullParser.CDSECT ) 258 { 259 handleCdsect( parser, sink ); 260 } 261 else if ( eventType == XmlPullParser.COMMENT ) 262 { 263 handleComment( parser, sink ); 264 } 265 else if ( eventType == XmlPullParser.ENTITY_REF ) 266 { 267 handleEntity( parser, sink ); 268 } 269 else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE ) 270 { 271 // nop 272 } 273 else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION ) 274 { 275 // nop 276 } 277 else if ( eventType == XmlPullParser.DOCDECL ) 278 { 279 addLocalEntities( parser, parser.getText() ); 280 281 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() ) 282 { 283 addDTDEntities( parser, new String( res ) ); 284 } 285 } 286 287 try 288 { 289 eventType = parser.nextToken(); 290 } 291 catch ( IOException io ) 292 { 293 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io ); 294 } 295 } 296 } 297 298 /** 299 * Goes through the possible start tags. 300 * 301 * @param parser A parser, not null. 302 * @param sink the sink to receive the events. 303 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 304 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro 305 */ 306 protected abstract void handleStartTag( XmlPullParser parser, Sink sink ) 307 throws XmlPullParserException, MacroExecutionException; 308 309 /** 310 * Goes through the possible end tags. 311 * 312 * @param parser A parser, not null. 313 * @param sink the sink to receive the events. 314 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 315 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro 316 */ 317 protected abstract void handleEndTag( XmlPullParser parser, Sink sink ) 318 throws XmlPullParserException, MacroExecutionException; 319 320 /** 321 * Handles text events. 322 * 323 * <p>This is a default implementation, if the parser points to a non-empty text element, 324 * it is emitted as a text event into the specified sink.</p> 325 * 326 * @param parser A parser, not null. 327 * @param sink the sink to receive the events. Not null. 328 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 329 */ 330 protected void handleText( XmlPullParser parser, Sink sink ) 331 throws XmlPullParserException 332 { 333 String text = getText( parser ); 334 335 /* 336 * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the 337 * parser so any whitespace that makes it here is significant. 338 */ 339 if ( StringUtils.isNotEmpty( text ) ) 340 { 341 sink.text( text ); 342 } 343 } 344 345 /** 346 * Handles CDATA sections. 347 * 348 * <p>This is a default implementation, all data are emitted as text 349 * events into the specified sink.</p> 350 * 351 * @param parser A parser, not null. 352 * @param sink the sink to receive the events. Not null. 353 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 354 */ 355 protected void handleCdsect( XmlPullParser parser, Sink sink ) 356 throws XmlPullParserException 357 { 358 sink.text( getText( parser ) ); 359 } 360 361 /** 362 * Handles comments. 363 * 364 * <p>This is a default implementation, all data are emitted as comment 365 * events into the specified sink.</p> 366 * 367 * @param parser A parser, not null. 368 * @param sink the sink to receive the events. Not null. 369 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 370 */ 371 protected void handleComment( XmlPullParser parser, Sink sink ) 372 throws XmlPullParserException 373 { 374 if ( isEmitComments() ) 375 { 376 sink.comment( getText( parser ) ); 377 } 378 } 379 380 /** 381 * Handles entities. 382 * 383 * <p>This is a default implementation, all entities are resolved and emitted as text 384 * events into the specified sink, except:</p> 385 * <ul> 386 * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code> 387 * are emitted as <code>nonBreakingSpace()</code> events.</li> 388 * </ul> 389 * 390 * @param parser A parser, not null. 391 * @param sink the sink to receive the events. Not null. 392 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 393 */ 394 protected void handleEntity( XmlPullParser parser, Sink sink ) 395 throws XmlPullParserException 396 { 397 String text = getText( parser ); 398 399 String name = parser.getName(); 400 401 if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) ) 402 { 403 sink.nonBreakingSpace(); 404 } 405 else 406 { 407 String unescaped = HtmlTools.unescapeHTML( text ); 408 409 sink.text( unescaped ); 410 } 411 } 412 413 /** 414 * Handles an unknown event. 415 * 416 * <p>This is a default implementation, all events are emitted as unknown 417 * events into the specified sink.</p> 418 * 419 * @param parser the parser to get the event from. 420 * @param sink the sink to receive the event. 421 * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE, 422 * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE. 423 * It will be passed as the first argument of the required parameters to the Sink 424 * {@link 425 * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)} 426 * method. 427 */ 428 protected void handleUnknown( XmlPullParser parser, Sink sink, int type ) 429 { 430 Object[] required = new Object[] { Integer.valueOf( type ) }; 431 432 SinkEventAttributeSet attribs = getAttributesFromParser( parser ); 433 434 sink.unknown( parser.getName(), required, attribs ); 435 } 436 437 /** 438 * <p>isIgnorableWhitespace.</p> 439 * 440 * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise. 441 * @see #setIgnorableWhitespace(boolean) 442 * @since 1.1 443 */ 444 protected boolean isIgnorableWhitespace() 445 { 446 return ignorableWhitespace; 447 } 448 449 /** 450 * Specify that whitespace will be ignored. I.e.: 451 * <pre><tr> <td/> </tr></pre> 452 * is equivalent to 453 * <pre><tr><td/></tr></pre> 454 * 455 * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise. 456 * @since 1.1 457 */ 458 protected void setIgnorableWhitespace( boolean ignorable ) 459 { 460 this.ignorableWhitespace = ignorable; 461 } 462 463 /** 464 * <p>isCollapsibleWhitespace.</p> 465 * 466 * @return <code>true</code> if text will collapse, <code>false</code> otherwise. 467 * @see #setCollapsibleWhitespace(boolean) 468 * @since 1.1 469 */ 470 protected boolean isCollapsibleWhitespace() 471 { 472 return collapsibleWhitespace; 473 } 474 475 /** 476 * Specify that text will be collapsed. I.e.: 477 * <pre>Text Text</pre> 478 * is equivalent to 479 * <pre>Text Text</pre> 480 * 481 * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise. 482 * @since 1.1 483 */ 484 protected void setCollapsibleWhitespace( boolean collapsible ) 485 { 486 this.collapsibleWhitespace = collapsible; 487 } 488 489 /** 490 * <p>isTrimmableWhitespace.</p> 491 * 492 * @return <code>true</code> if text will be trim, <code>false</code> otherwise. 493 * @see #setTrimmableWhitespace(boolean) 494 * @since 1.1 495 */ 496 protected boolean isTrimmableWhitespace() 497 { 498 return trimmableWhitespace; 499 } 500 501 /** 502 * Specify that text will be collapsed. I.e.: 503 * <pre><p> Text </p></pre> 504 * is equivalent to 505 * <pre><p>Text</p></pre> 506 * 507 * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise. 508 * @since 1.1 509 */ 510 protected void setTrimmableWhitespace( boolean trimmable ) 511 { 512 this.trimmableWhitespace = trimmable; 513 } 514 515 /** 516 * <p>getText.</p> 517 * 518 * @param parser A parser, not null. 519 * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration. 520 * @see XmlPullParser#getText() 521 * @see #isCollapsibleWhitespace() 522 * @see #isTrimmableWhitespace() 523 * @since 1.1 524 */ 525 protected String getText( XmlPullParser parser ) 526 { 527 String text = parser.getText(); 528 529 if ( isTrimmableWhitespace() ) 530 { 531 text = text.trim(); 532 } 533 534 if ( isCollapsibleWhitespace() ) 535 { 536 StringBuilder newText = new StringBuilder(); 537 String[] elts = StringUtils.split( text, " \r\n" ); 538 for ( int i = 0; i < elts.length; i++ ) 539 { 540 newText.append( elts[i] ); 541 if ( ( i + 1 ) < elts.length ) 542 { 543 newText.append( " " ); 544 } 545 } 546 text = newText.toString(); 547 } 548 549 return text; 550 } 551 552 /** 553 * Return the defined entities in a local doctype. I.e.: 554 * <pre> 555 * <!DOCTYPE foo [ 556 * <!ENTITY bar "&#x160;"> 557 * <!ENTITY bar1 "&#x161;"> 558 * ]> 559 * </pre> 560 * 561 * @return a map of the defined entities in a local doctype. 562 * @since 1.1 563 */ 564 protected Map<String, String> getLocalEntities() 565 { 566 if ( entities == null ) 567 { 568 entities = new LinkedHashMap<String, String>(); 569 } 570 571 return entities; 572 } 573 574 /** 575 * <p>isValidate.</p> 576 * 577 * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise. 578 * @since 1.1 579 */ 580 public boolean isValidate() 581 { 582 return validate; 583 } 584 585 /** 586 * Specify a flag to validate or not the XML content. 587 * 588 * @param validate the validate to set 589 * @see #parse(Reader, Sink) 590 * @since 1.1 591 */ 592 public void setValidate( boolean validate ) 593 { 594 this.validate = validate; 595 } 596 597 // ---------------------------------------------------------------------- 598 // Private methods 599 // ---------------------------------------------------------------------- 600 601 /** 602 * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}. 603 * <br/> 604 * By default, we exclude the default XML entities: &amp;, &lt;, &gt;, &quot; and &apos;. 605 * 606 * @param parser not null 607 * @param entityName not null 608 * @param entityValue not null 609 * @throws XmlPullParserException if any 610 * @see {@link XmlPullParser#defineEntityReplacementText(String, String)} 611 */ 612 private void addEntity( XmlPullParser parser, String entityName, String entityValue ) 613 throws XmlPullParserException 614 { 615 if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" ) 616 || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) ) 617 { 618 return; 619 } 620 621 parser.defineEntityReplacementText( entityName, entityValue ); 622 getLocalEntities().put( entityName, entityValue ); 623 } 624 625 /** 626 * Handle entities defined in a local doctype as the following: 627 * <pre> 628 * <!DOCTYPE foo [ 629 * <!ENTITY bar "&#x160;"> 630 * <!ENTITY bar1 "&#x161;"> 631 * ]> 632 * </pre> 633 * 634 * @param parser not null 635 * @param text not null 636 * @throws XmlPullParserException if any 637 */ 638 private void addLocalEntities( XmlPullParser parser, String text ) 639 throws XmlPullParserException 640 { 641 int entitiesCount = StringUtils.countMatches( text, ENTITY_START ); 642 if ( entitiesCount > 0 ) 643 { 644 // text should be foo [...] 645 int start = text.indexOf( '[' ); 646 int end = text.lastIndexOf( ']' ); 647 if ( start != -1 && end != -1 ) 648 { 649 addDTDEntities( parser, text.substring( start + 1, end ) ); 650 } 651 } 652 } 653 654 /** 655 * Handle entities defined in external doctypes as the following: 656 * <pre> 657 * <!DOCTYPE foo [ 658 * <!-- These are the entity sets for ISO Latin 1 characters for the XHTML --> 659 * <!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN" 660 * "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"> 661 * %HTMLlat1; 662 * ]> 663 * </pre> 664 * 665 * @param parser not null 666 * @param text not null 667 * @throws XmlPullParserException if any 668 */ 669 private void addDTDEntities( XmlPullParser parser, String text ) 670 throws XmlPullParserException 671 { 672 int entitiesCount = StringUtils.countMatches( text, ENTITY_START ); 673 if ( entitiesCount > 0 ) 674 { 675 final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START ); 676 BufferedReader reader = new BufferedReader( new StringReader( txt ) ); 677 String line; 678 String tmpLine = ""; 679 try 680 { 681 Matcher matcher; 682 while ( ( line = reader.readLine() ) != null ) 683 { 684 tmpLine += "\n" + line; 685 matcher = PATTERN_ENTITY_1.matcher( tmpLine ); 686 if ( matcher.find() && matcher.groupCount() == 7 ) 687 { 688 String entityName = matcher.group( 2 ); 689 String entityValue = matcher.group( 5 ); 690 691 addEntity( parser, entityName, entityValue ); 692 tmpLine = ""; 693 } 694 else 695 { 696 matcher = PATTERN_ENTITY_2.matcher( tmpLine ); 697 if ( matcher.find() && matcher.groupCount() == 8 ) 698 { 699 String entityName = matcher.group( 2 ); 700 String entityValue = matcher.group( 5 ); 701 702 addEntity( parser, entityName, entityValue ); 703 tmpLine = ""; 704 } 705 } 706 } 707 } 708 catch ( IOException e ) 709 { 710 // nop 711 } 712 finally 713 { 714 IOUtil.close( reader ); 715 } 716 } 717 } 718 719 /** 720 * Implementation of the callback mechanism <code>EntityResolver</code>. 721 * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>. 722 */ 723 public static class CachedFileEntityResolver 724 implements EntityResolver 725 { 726 /** Map with systemId as key and the content of systemId as byte[]. */ 727 protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<String, byte[]>(); 728 729 /** {@inheritDoc} */ 730 public InputSource resolveEntity( String publicId, String systemId ) 731 throws SAXException, IOException 732 { 733 byte[] res = ENTITY_CACHE.get( systemId ); 734 // already cached? 735 if ( res == null ) 736 { 737 String systemName = FileUtils.getFile( systemId ).getName(); 738 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName ); 739 // maybe already as a temp file? 740 if ( !temp.exists() ) 741 { 742 // is systemId a file or an url? 743 if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) ) 744 { 745 // Doxia XSDs are included in the jars, so try to find the resource systemName from 746 // the classpath... 747 String resource = "/" + systemName; 748 URL url = getClass().getResource( resource ); 749 if ( url != null ) 750 { 751 res = toByteArray( url ); 752 } 753 else 754 { 755 throw new SAXException( "Could not find the SYSTEM entity: " + systemId 756 + " because '" + resource + "' is not available of the classpath." ); 757 } 758 } 759 else 760 { 761 res = toByteArray( new URL( systemId ) ); 762 } 763 764 // write systemId as temp file 765 copy( res, temp ); 766 } 767 else 768 { 769 // TODO How to refresh Doxia XSDs from temp dir? 770 res = toByteArray( temp.toURI().toURL() ); 771 } 772 773 ENTITY_CACHE.put( systemId, res ); 774 } 775 776 InputSource is = new InputSource( new ByteArrayInputStream( res ) ); 777 is.setPublicId( publicId ); 778 is.setSystemId( systemId ); 779 780 return is; 781 } 782 783 /** 784 * If url is not an http/https urls, call {@link IOUtil#toByteArray(java.io.InputStream)} to get the url 785 * content. 786 * Otherwise, use HttpClient to get the http content. 787 * Wrap all internal exceptions to throw SAXException. 788 * 789 * @param url not null 790 * @return return an array of byte 791 * @throws SAXException if any 792 */ 793 private static byte[] toByteArray( URL url ) 794 throws SAXException 795 { 796 if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) ) 797 { 798 InputStream is = null; 799 try 800 { 801 is = url.openStream(); 802 if ( is == null ) 803 { 804 throw new SAXException( "Cannot open stream from the url: " + url.toString() ); 805 } 806 return IOUtil.toByteArray( is ); 807 } 808 catch ( IOException e ) 809 { 810 throw new SAXException( "IOException: " + e.getMessage(), e ); 811 } 812 finally 813 { 814 IOUtil.close( is ); 815 } 816 } 817 818 // it is an HTTP url, using HttpClient... 819 DefaultHttpClient client = new DefaultHttpClient(); 820 HttpGet method = new HttpGet( url.toString() ); 821 // Set a user-agent that doesn't contain the word "java", otherwise it will be blocked by the W3C 822 // The default user-agent is "Apache-HttpClient/4.0.2 (java 1.5)" 823 method.setHeader( "user-agent", "Apache-Doxia/" + doxiaVersion() ); 824 825 HttpRequestRetryHandler retryHandler = new DefaultHttpRequestRetryHandler( 3, false ); 826 client.setHttpRequestRetryHandler( retryHandler ); 827 828 HttpEntity entity = null; 829 try 830 { 831 HttpResponse response = client.execute( method ); 832 int statusCode = response.getStatusLine().getStatusCode(); 833 if ( statusCode != HttpStatus.SC_OK ) 834 { 835 throw new IOException( "The status code when accessing the URL '" + url.toString() + "' was " 836 + statusCode + ", which is not allowed. The server gave this reason for the failure '" 837 + response.getStatusLine().getReasonPhrase() + "'." ); 838 } 839 840 entity = response.getEntity(); 841 return EntityUtils.toByteArray( entity ); 842 } 843 catch ( ClientProtocolException e ) 844 { 845 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e ); 846 } 847 catch ( IOException e ) 848 { 849 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e ); 850 } 851 finally 852 { 853 if ( entity != null ) 854 { 855 try 856 { 857 entity.consumeContent(); 858 } 859 catch ( IOException e ) 860 { 861 // Ignore 862 } 863 } 864 } 865 } 866 867 /** 868 * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException. 869 * 870 * @param res not null array of byte 871 * @param f the file where to write the bytes 872 * @throws SAXException if any 873 * @see {@link IOUtil#copy(byte[], OutputStream)} 874 */ 875 private void copy( byte[] res, File f ) 876 throws SAXException 877 { 878 if ( f.isDirectory() ) 879 { 880 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." ); 881 } 882 883 OutputStream os = null; 884 try 885 { 886 os = new FileOutputStream( f ); 887 IOUtil.copy( res, os ); 888 } 889 catch ( IOException e ) 890 { 891 throw new SAXException( "IOException: " + e.getMessage(), e ); 892 } 893 finally 894 { 895 IOUtil.close( os ); 896 } 897 } 898 } 899}