001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.maven.doxia.parser; 020 021import java.io.BufferedReader; 022import java.io.ByteArrayInputStream; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.Reader; 026import java.io.StringReader; 027import java.net.URI; 028import java.net.URL; 029import java.nio.file.Paths; 030import java.util.HashMap; 031import java.util.Hashtable; 032import java.util.LinkedHashMap; 033import java.util.Map; 034import java.util.regex.Matcher; 035import java.util.regex.Pattern; 036 037import org.apache.commons.io.IOUtils; 038import org.apache.commons.lang3.StringUtils; 039import org.apache.maven.doxia.macro.MacroExecutionException; 040import org.apache.maven.doxia.markup.XmlMarkup; 041import org.apache.maven.doxia.sink.Sink; 042import org.apache.maven.doxia.sink.impl.AbstractLocator; 043import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet; 044import org.apache.maven.doxia.util.HtmlTools; 045import org.apache.maven.doxia.util.XmlValidator; 046import org.codehaus.plexus.util.xml.pull.EntityReplacementMap; 047import org.codehaus.plexus.util.xml.pull.MXParser; 048import org.codehaus.plexus.util.xml.pull.XmlPullParser; 049import org.codehaus.plexus.util.xml.pull.XmlPullParserException; 050import org.slf4j.Logger; 051import org.slf4j.LoggerFactory; 052import org.xml.sax.EntityResolver; 053import org.xml.sax.InputSource; 054import org.xml.sax.SAXException; 055 056/** 057 * An abstract class that defines some convenience methods for <code>XML</code> parsers. 058 * 059 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a> 060 * @since 1.0 061 */ 062public abstract class AbstractXmlParser extends AbstractParser implements XmlMarkup { 063 /** 064 * Entity pattern for HTML entity, i.e. &nbsp; 065 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*> 066 * <br> 067 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>. 068 */ 069 private static final Pattern PATTERN_ENTITY_1 = 070 Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>"); 071 072 /** 073 * Entity pattern for Unicode entity, i.e. &#38; 074 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" 075 * <br> 076 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>. 077 */ 078 private static final Pattern PATTERN_ENTITY_2 = 079 Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"); 080 081 private boolean ignorableWhitespace; 082 083 private boolean collapsibleWhitespace; 084 085 private boolean trimmableWhitespace; 086 087 private Map<String, String> entities; 088 089 private boolean validate = false; 090 091 /** 092 * If set the parser will be loaded with all single characters 093 * from the XHTML specification. 094 * The entities used: 095 * <ul> 096 * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent</li> 097 * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent</li> 098 * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent</li> 099 * </ul> 100 */ 101 private boolean addDefaultEntities = true; 102 103 /** {@inheritDoc} */ 104 public void parse(Reader source, Sink sink, String reference) throws ParseException { 105 init(); 106 107 Reader src = source; 108 109 // 1 first parsing if validation is required 110 if (isValidate()) { 111 String content; 112 try { 113 content = IOUtils.toString(new BufferedReader(src)); 114 } catch (IOException e) { 115 throw new ParseException("Error reading the model", e); 116 } 117 118 XmlValidator validator = new XmlValidator(); 119 validator.setDefaultHandler(new XmlValidator.MessagesErrorHandler()); 120 validator.setEntityResolver(new CachedFileEntityResolver()); 121 122 validator.validate(content); 123 124 src = new StringReader(content); 125 } 126 127 // 2 second parsing to process 128 try { 129 XmlPullParser parser = addDefaultEntities 130 ? new MXParser(EntityReplacementMap.defaultEntityReplacementMap) 131 : new MXParser(); 132 133 parser.setInput(src); 134 135 // allow parser initialization, e.g. for additional entities in XHTML 136 // Note: do it after input is set, otherwise values are reset 137 initXmlParser(parser); 138 139 parseXml(parser, getWrappedSink(sink), reference); 140 } catch (XmlPullParserException ex) { 141 throw new ParseException("Error parsing the model", ex, ex.getLineNumber(), ex.getColumnNumber()); 142 } catch (MacroExecutionException ex) { 143 throw new ParseException("Macro execution failed", ex); 144 } 145 146 setSecondParsing(false); 147 init(); 148 } 149 150 /** 151 * Initializes the parser with custom entities or other options. 152 * 153 * @param parser A parser, not null. 154 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser 155 */ 156 protected void initXmlParser(XmlPullParser parser) throws XmlPullParserException { 157 // nop 158 } 159 160 /** {@inheritDoc} */ 161 @Override 162 public final int getType() { 163 return XML_TYPE; 164 } 165 166 /** 167 * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet. 168 * 169 * @param parser A parser, not null. 170 * @return a SinkEventAttributeSet or null if the current parser event is not a start tag. 171 * @since 1.1 172 */ 173 protected SinkEventAttributeSet getAttributesFromParser(XmlPullParser parser) { 174 int count = parser.getAttributeCount(); 175 176 if (count < 0) { 177 return null; 178 } 179 180 SinkEventAttributeSet atts = new SinkEventAttributeSet(count); 181 182 for (int i = 0; i < count; i++) { 183 atts.addAttribute(parser.getAttributeName(i), parser.getAttributeValue(i)); 184 } 185 186 return atts; 187 } 188 189 private static final class XmlPullParserLocator extends AbstractLocator { 190 191 private final XmlPullParser parser; 192 193 XmlPullParserLocator(XmlPullParser parser, String reference) { 194 super(reference); 195 this.parser = parser; 196 } 197 198 @Override 199 public int getLineNumber() { 200 return parser.getLineNumber(); 201 } 202 203 @Override 204 public int getColumnNumber() { 205 return parser.getColumnNumber() != -1 ? parser.getColumnNumber() + 1 : -1; 206 } 207 } 208 /** 209 * Parse the model from the XmlPullParser into the given sink. 210 * 211 * @param parser A parser, not null. 212 * @param sink the sink to receive the events. 213 * @param reference the reference (usually the file path of the parsed document) 214 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 215 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro 216 */ 217 private void parseXml(XmlPullParser parser, Sink sink, String reference) 218 throws XmlPullParserException, MacroExecutionException { 219 sink.setDocumentLocator(new XmlPullParserLocator(parser, reference)); 220 int eventType = parser.getEventType(); 221 222 while (eventType != XmlPullParser.END_DOCUMENT) { 223 if (eventType == XmlPullParser.START_TAG) { 224 handleStartTag(parser, sink); 225 } else if (eventType == XmlPullParser.END_TAG) { 226 handleEndTag(parser, sink); 227 } else if (eventType == XmlPullParser.TEXT) { 228 String text = getText(parser); 229 230 if (isIgnorableWhitespace()) { 231 if (text.trim().length() != 0) { 232 handleText(parser, sink); 233 } 234 } else { 235 handleText(parser, sink); 236 } 237 } else if (eventType == XmlPullParser.CDSECT) { 238 handleCdsect(parser, sink); 239 } else if (eventType == XmlPullParser.COMMENT) { 240 handleComment(parser, sink); 241 } else if (eventType == XmlPullParser.ENTITY_REF) { 242 handleEntity(parser, sink); 243 } else if (eventType == XmlPullParser.IGNORABLE_WHITESPACE) { 244 // nop 245 } else if (eventType == XmlPullParser.PROCESSING_INSTRUCTION) { 246 // nop 247 } else if (eventType == XmlPullParser.DOCDECL) { 248 addLocalEntities(parser, parser.getText()); 249 250 for (byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values()) { 251 addDTDEntities(parser, new String(res)); 252 } 253 } 254 255 try { 256 eventType = parser.nextToken(); 257 } catch (IOException io) { 258 // Does not have a cause arg 259 throw new XmlPullParserException("Failed to parse next token", parser, io); 260 } 261 } 262 } 263 264 /** 265 * Goes through the possible start tags. 266 * 267 * @param parser A parser, not null. 268 * @param sink the sink to receive the events. 269 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 270 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro 271 */ 272 protected abstract void handleStartTag(XmlPullParser parser, Sink sink) 273 throws XmlPullParserException, MacroExecutionException; 274 275 /** 276 * Goes through the possible end tags. 277 * 278 * @param parser A parser, not null. 279 * @param sink the sink to receive the events. 280 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 281 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro 282 */ 283 protected abstract void handleEndTag(XmlPullParser parser, Sink sink) 284 throws XmlPullParserException, MacroExecutionException; 285 286 /** 287 * Handles text events. 288 * 289 * <p>This is a default implementation, if the parser points to a non-empty text element, 290 * it is emitted as a text event into the specified sink.</p> 291 * 292 * @param parser A parser, not null. 293 * @param sink the sink to receive the events. Not null. 294 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 295 */ 296 protected void handleText(XmlPullParser parser, Sink sink) throws XmlPullParserException { 297 String text = getText(parser); 298 299 /* 300 * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the 301 * parser so any whitespace that makes it here is significant. 302 */ 303 if (text != null && !text.isEmpty()) { 304 sink.text(text); 305 } 306 } 307 308 /** 309 * Handles CDATA sections. 310 * 311 * <p>This is a default implementation, all data are emitted as text 312 * events into the specified sink.</p> 313 * 314 * @param parser A parser, not null. 315 * @param sink the sink to receive the events. Not null. 316 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 317 */ 318 protected void handleCdsect(XmlPullParser parser, Sink sink) throws XmlPullParserException { 319 sink.text(getText(parser)); 320 } 321 322 /** 323 * Handles comments. 324 * 325 * <p>This is a default implementation, all data are emitted as comment 326 * events into the specified sink.</p> 327 * 328 * @param parser A parser, not null. 329 * @param sink the sink to receive the events. Not null. 330 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 331 */ 332 protected void handleComment(XmlPullParser parser, Sink sink) throws XmlPullParserException { 333 if (isEmitComments()) { 334 sink.comment(getText(parser)); 335 } 336 } 337 338 /** 339 * Handles entities. 340 * 341 * <p>This is a default implementation, all entities are resolved and emitted as text 342 * events into the specified sink, except:</p> 343 * <ul> 344 * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code> 345 * are emitted as <code>nonBreakingSpace()</code> events.</li> 346 * </ul> 347 * 348 * @param parser A parser, not null. 349 * @param sink the sink to receive the events. Not null. 350 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 351 */ 352 protected void handleEntity(XmlPullParser parser, Sink sink) throws XmlPullParserException { 353 String text = getText(parser); 354 355 String name = parser.getName(); 356 357 if ("#160".equals(name) || "nbsp".equals(name) || "#x00A0".equals(name)) { 358 sink.nonBreakingSpace(); 359 } else { 360 String unescaped = HtmlTools.unescapeHTML(text); 361 362 sink.text(unescaped); 363 } 364 } 365 366 /** 367 * Handles an unknown event. 368 * 369 * <p>This is a default implementation, all events are emitted as unknown 370 * events into the specified sink.</p> 371 * 372 * @param parser the parser to get the event from. 373 * @param sink the sink to receive the event. 374 * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE, 375 * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE. 376 * It will be passed as the first argument of the required parameters to the Sink 377 * {@link 378 * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)} 379 * method. 380 */ 381 protected void handleUnknown(XmlPullParser parser, Sink sink, int type) { 382 SinkEventAttributeSet attribs = getAttributesFromParser(parser); 383 384 handleUnknown(parser.getName(), attribs, sink, type); 385 } 386 387 protected void handleUnknown(String elementName, SinkEventAttributeSet attribs, Sink sink, int type) { 388 Object[] required = new Object[] {type}; 389 sink.unknown(elementName, required, attribs); 390 } 391 392 /** 393 * <p>isIgnorableWhitespace.</p> 394 * 395 * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise. 396 * @see #setIgnorableWhitespace(boolean) 397 * @since 1.1 398 */ 399 protected boolean isIgnorableWhitespace() { 400 return ignorableWhitespace; 401 } 402 403 /** 404 * Specify that whitespace will be ignored. I.e.: 405 * <pre><tr> <td/> </tr></pre> 406 * is equivalent to 407 * <pre><tr><td/></tr></pre> 408 * 409 * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise. 410 * @since 1.1 411 */ 412 protected void setIgnorableWhitespace(boolean ignorable) { 413 this.ignorableWhitespace = ignorable; 414 } 415 416 /** 417 * <p>isCollapsibleWhitespace.</p> 418 * 419 * @return <code>true</code> if text will collapse, <code>false</code> otherwise. 420 * @see #setCollapsibleWhitespace(boolean) 421 * @since 1.1 422 */ 423 protected boolean isCollapsibleWhitespace() { 424 return collapsibleWhitespace; 425 } 426 427 /** 428 * Specify that text will be collapsed. I.e.: 429 * <pre>Text Text</pre> 430 * is equivalent to 431 * <pre>Text Text</pre> 432 * 433 * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise. 434 * @since 1.1 435 */ 436 protected void setCollapsibleWhitespace(boolean collapsible) { 437 this.collapsibleWhitespace = collapsible; 438 } 439 440 /** 441 * <p>isTrimmableWhitespace.</p> 442 * 443 * @return <code>true</code> if text will be trim, <code>false</code> otherwise. 444 * @see #setTrimmableWhitespace(boolean) 445 * @since 1.1 446 */ 447 protected boolean isTrimmableWhitespace() { 448 return trimmableWhitespace; 449 } 450 451 /** 452 * Specify that text will be collapsed. I.e.: 453 * <pre><p> Text </p></pre> 454 * is equivalent to 455 * <pre><p>Text</p></pre> 456 * 457 * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise. 458 * @since 1.1 459 */ 460 protected void setTrimmableWhitespace(boolean trimmable) { 461 this.trimmableWhitespace = trimmable; 462 } 463 464 /** 465 * <p>getText.</p> 466 * 467 * @param parser A parser, not null. 468 * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration. 469 * @see XmlPullParser#getText() 470 * @see #isCollapsibleWhitespace() 471 * @see #isTrimmableWhitespace() 472 * @since 1.1 473 */ 474 protected String getText(XmlPullParser parser) { 475 String text = parser.getText(); 476 477 if (isTrimmableWhitespace()) { 478 text = text.trim(); 479 } 480 481 if (isCollapsibleWhitespace()) { 482 StringBuilder newText = new StringBuilder(); 483 String[] elts = StringUtils.split(text, " \r\n"); 484 for (int i = 0; i < elts.length; i++) { 485 newText.append(elts[i]); 486 if ((i + 1) < elts.length) { 487 newText.append(" "); 488 } 489 } 490 text = newText.toString(); 491 } 492 493 return text; 494 } 495 496 /** 497 * Return the defined entities in a local doctype. I.e.: 498 * <pre> 499 * <!DOCTYPE foo [ 500 * <!ENTITY bar "&#x160;"> 501 * <!ENTITY bar1 "&#x161;"> 502 * ]> 503 * </pre> 504 * 505 * @return a map of the defined entities in a local doctype. 506 * @since 1.1 507 */ 508 protected Map<String, String> getLocalEntities() { 509 if (entities == null) { 510 entities = new LinkedHashMap<>(); 511 } 512 513 return entities; 514 } 515 516 /** 517 * <p>isValidate.</p> 518 * 519 * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise. 520 * @since 1.1 521 */ 522 public boolean isValidate() { 523 return validate; 524 } 525 526 /** 527 * Specify a flag to validate or not the XML content. 528 * 529 * @param validate the validate to set 530 * @see #parse(Reader, Sink) 531 * @since 1.1 532 */ 533 public void setValidate(boolean validate) { 534 this.validate = validate; 535 } 536 537 /** 538 * @since 2.0.0-M4 539 */ 540 public boolean getAddDefaultEntities() { 541 return addDefaultEntities; 542 } 543 544 /** 545 * @since 2.0.0-M4 546 */ 547 public void setAddDefaultEntities(boolean addDefaultEntities) { 548 this.addDefaultEntities = addDefaultEntities; 549 } 550 551 // ---------------------------------------------------------------------- 552 // Private methods 553 // ---------------------------------------------------------------------- 554 555 /** 556 * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}. 557 * <br> 558 * By default, we exclude the default XML entities: &amp;, &lt;, &gt;, &quot; and &apos;. 559 * 560 * @param parser not null 561 * @param entityName not null 562 * @param entityValue not null 563 * @throws XmlPullParserException if any 564 * @see XmlPullParser#defineEntityReplacementText(String, String) 565 */ 566 private void addEntity(XmlPullParser parser, String entityName, String entityValue) throws XmlPullParserException { 567 if (entityName.endsWith("amp") 568 || entityName.endsWith("lt") 569 || entityName.endsWith("gt") 570 || entityName.endsWith("quot") 571 || entityName.endsWith("apos")) { 572 return; 573 } 574 575 parser.defineEntityReplacementText(entityName, entityValue); 576 getLocalEntities().put(entityName, entityValue); 577 } 578 579 /** 580 * Handle entities defined in a local doctype as the following: 581 * <pre> 582 * <!DOCTYPE foo [ 583 * <!ENTITY bar "&#x160;"> 584 * <!ENTITY bar1 "&#x161;"> 585 * ]> 586 * </pre> 587 * 588 * @param parser not null 589 * @param text not null 590 * @throws XmlPullParserException if any 591 */ 592 private void addLocalEntities(XmlPullParser parser, String text) throws XmlPullParserException { 593 int entitiesCount = StringUtils.countMatches(text, ENTITY_START); 594 if (entitiesCount > 0) { 595 // text should be foo [...] 596 int start = text.indexOf('['); 597 int end = text.lastIndexOf(']'); 598 if (start != -1 && end != -1) { 599 addDTDEntities(parser, text.substring(start + 1, end)); 600 } 601 } 602 } 603 604 /** 605 * Handle entities defined in external doctypes as the following: 606 * <pre> 607 * <!DOCTYPE foo [ 608 * <!-- These are the entity sets for ISO Latin 1 characters for the XHTML --> 609 * <!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN" 610 * "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"> 611 * %HTMLlat1; 612 * ]> 613 * </pre> 614 * 615 * @param parser not null 616 * @param text not null 617 * @throws XmlPullParserException if any 618 */ 619 private void addDTDEntities(XmlPullParser parser, String text) throws XmlPullParserException { 620 int entitiesCount = StringUtils.countMatches(text, ENTITY_START); 621 if (entitiesCount > 0) { 622 final String txt = StringUtils.replace(text, ENTITY_START, "\n" + ENTITY_START); 623 try (BufferedReader reader = new BufferedReader(new StringReader(txt))) { 624 String line; 625 String tmpLine = ""; 626 Matcher matcher; 627 while ((line = reader.readLine()) != null) { 628 tmpLine += "\n" + line; 629 matcher = PATTERN_ENTITY_1.matcher(tmpLine); 630 if (matcher.find() && matcher.groupCount() == 7) { 631 String entityName = matcher.group(2); 632 String entityValue = matcher.group(5); 633 634 addEntity(parser, entityName, entityValue); 635 tmpLine = ""; 636 } else { 637 matcher = PATTERN_ENTITY_2.matcher(tmpLine); 638 if (matcher.find() && matcher.groupCount() == 8) { 639 String entityName = matcher.group(2); 640 String entityValue = matcher.group(5); 641 642 addEntity(parser, entityName, entityValue); 643 tmpLine = ""; 644 } 645 } 646 } 647 } catch (IOException e) { 648 // nop 649 } 650 } 651 } 652 653 /** 654 * Implementation of the callback mechanism <code>EntityResolver</code>. 655 * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>. 656 */ 657 public static class CachedFileEntityResolver implements EntityResolver { 658 private static final Logger LOGGER = LoggerFactory.getLogger(CachedFileEntityResolver.class); 659 660 /** Map with systemId as key and the content of systemId as byte[]. */ 661 protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>(); 662 663 private static final Map<String, String> WELL_KNOWN_SYSTEM_IDS = new HashMap<>(); 664 665 static { 666 WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/2001/xml.xsd", "xml.xsd"); 667 WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/2001/xml.xsd", "xml.xsd"); 668 WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd"); 669 WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd"); 670 WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd"); 671 WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd"); 672 WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent"); 673 WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent"); 674 } 675 676 /** {@inheritDoc} */ 677 public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { 678 byte[] res = ENTITY_CACHE.get(systemId); 679 // already cached? 680 if (res == null) { 681 if (WELL_KNOWN_SYSTEM_IDS.containsKey(systemId)) { 682 String resource = "/" + WELL_KNOWN_SYSTEM_IDS.get(systemId); 683 URL url = getClass().getResource(resource); 684 if (url != null) { 685 LOGGER.debug( 686 "Resolving SYSTEM '{}' from well-known classpath resource '{}'", systemId, resource); 687 res = toByteArray(url); 688 } 689 } 690 691 if (res == null) { 692 URI uri = URI.create(systemId); 693 if (uri.getScheme() == null) { 694 uri = Paths.get(systemId).toUri(); 695 } 696 697 LOGGER.debug("Resolving SYSTEM '{}' from URI resource '{}'", systemId, uri); 698 res = toByteArray(uri.toURL()); 699 } 700 701 ENTITY_CACHE.put(systemId, res); 702 } else { 703 LOGGER.debug("Resolved SYSTEM '{}' from cache", systemId); 704 } 705 706 InputSource is = new InputSource(new ByteArrayInputStream(res)); 707 is.setPublicId(publicId); 708 is.setSystemId(systemId); 709 710 return is; 711 } 712 713 /** 714 * @param url not null 715 * @return return an array of byte 716 * @throws SAXException if any 717 */ 718 private static byte[] toByteArray(URL url) throws SAXException { 719 try (InputStream is = url.openStream()) { 720 if (is == null) { 721 throw new SAXException("Cannot open stream from the url: " + url); 722 } 723 return IOUtils.toByteArray(is); 724 } catch (IOException e) { 725 throw new SAXException(e); 726 } 727 } 728 } 729}