View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.doxia.parser;
20  
21  import java.io.BufferedReader;
22  import java.io.ByteArrayInputStream;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.Reader;
26  import java.io.StringReader;
27  import java.net.URI;
28  import java.net.URL;
29  import java.nio.file.Paths;
30  import java.util.HashMap;
31  import java.util.Hashtable;
32  import java.util.LinkedHashMap;
33  import java.util.Map;
34  import java.util.regex.Matcher;
35  import java.util.regex.Pattern;
36  
37  import org.apache.commons.io.IOUtils;
38  import org.apache.commons.lang3.StringUtils;
39  import org.apache.maven.doxia.macro.MacroExecutionException;
40  import org.apache.maven.doxia.markup.XmlMarkup;
41  import org.apache.maven.doxia.sink.Sink;
42  import org.apache.maven.doxia.sink.impl.AbstractLocator;
43  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
44  import org.apache.maven.doxia.util.HtmlTools;
45  import org.apache.maven.doxia.util.XmlValidator;
46  import org.codehaus.plexus.util.xml.pull.EntityReplacementMap;
47  import org.codehaus.plexus.util.xml.pull.MXParser;
48  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
49  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
50  import org.slf4j.Logger;
51  import org.slf4j.LoggerFactory;
52  import org.xml.sax.EntityResolver;
53  import org.xml.sax.InputSource;
54  import org.xml.sax.SAXException;
55  
56  /**
57   * An abstract class that defines some convenience methods for <code>XML</code> parsers.
58   *
59   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
60   * @since 1.0
61   */
62  public abstract class AbstractXmlParser extends AbstractParser implements XmlMarkup {
63      /**
64       * Entity pattern for HTML entity, i.e. &#38;nbsp;
65       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
66       * <br>
67       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
68       */
69      private static final Pattern PATTERN_ENTITY_1 =
70              Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>");
71  
72      /**
73       * Entity pattern for Unicode entity, i.e. &#38;#38;
74       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
75       * <br>
76       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
77       */
78      private static final Pattern PATTERN_ENTITY_2 =
79              Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>");
80  
81      private boolean ignorableWhitespace;
82  
83      private boolean collapsibleWhitespace;
84  
85      private boolean trimmableWhitespace;
86  
87      private Map<String, String> entities;
88  
89      private boolean validate = false;
90  
91      /**
92       * If set the parser will be loaded with all single characters
93       * from the XHTML specification.
94       * The entities used:
95       * <ul>
96       * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent</li>
97       * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent</li>
98       * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent</li>
99       * </ul>
100      */
101     private boolean addDefaultEntities = true;
102 
103     /** {@inheritDoc} */
104     public void parse(Reader source, Sink sink, String reference) throws ParseException {
105         init();
106 
107         Reader src = source;
108 
109         // 1 first parsing if validation is required
110         if (isValidate()) {
111             String content;
112             try {
113                 content = IOUtils.toString(new BufferedReader(src));
114             } catch (IOException e) {
115                 throw new ParseException("Error reading the model", e);
116             }
117 
118             XmlValidator validator = new XmlValidator();
119             validator.setDefaultHandler(new XmlValidator.MessagesErrorHandler());
120             validator.setEntityResolver(new CachedFileEntityResolver());
121 
122             validator.validate(content);
123 
124             src = new StringReader(content);
125         }
126 
127         // 2 second parsing to process
128         try {
129             XmlPullParser parser = addDefaultEntities
130                     ? new MXParser(EntityReplacementMap.defaultEntityReplacementMap)
131                     : new MXParser();
132 
133             parser.setInput(src);
134 
135             // allow parser initialization, e.g. for additional entities in XHTML
136             // Note: do it after input is set, otherwise values are reset
137             initXmlParser(parser);
138 
139             parseXml(parser, getWrappedSink(sink), reference);
140         } catch (XmlPullParserException ex) {
141             throw new ParseException("Error parsing the model", ex, ex.getLineNumber(), ex.getColumnNumber());
142         } catch (MacroExecutionException ex) {
143             throw new ParseException("Macro execution failed", ex);
144         }
145 
146         setSecondParsing(false);
147         init();
148     }
149 
150     /**
151      * Initializes the parser with custom entities or other options.
152      *
153      * @param parser A parser, not null.
154      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser
155      */
156     protected void initXmlParser(XmlPullParser parser) throws XmlPullParserException {
157         // nop
158     }
159 
160     /** {@inheritDoc} */
161     @Override
162     public final int getType() {
163         return XML_TYPE;
164     }
165 
166     /**
167      * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
168      *
169      * @param parser A parser, not null.
170      * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
171      * @since 1.1
172      */
173     protected SinkEventAttributeSet getAttributesFromParser(XmlPullParser parser) {
174         int count = parser.getAttributeCount();
175 
176         if (count < 0) {
177             return null;
178         }
179 
180         SinkEventAttributeSet atts = new SinkEventAttributeSet(count);
181 
182         for (int i = 0; i < count; i++) {
183             atts.addAttribute(parser.getAttributeName(i), parser.getAttributeValue(i));
184         }
185 
186         return atts;
187     }
188 
189     private static final class XmlPullParserLocator extends AbstractLocator {
190 
191         private final XmlPullParser parser;
192 
193         XmlPullParserLocator(XmlPullParser parser, String reference) {
194             super(reference);
195             this.parser = parser;
196         }
197 
198         @Override
199         public int getLineNumber() {
200             return parser.getLineNumber();
201         }
202 
203         @Override
204         public int getColumnNumber() {
205             return parser.getColumnNumber() != -1 ? parser.getColumnNumber() + 1 : -1;
206         }
207     }
208     /**
209      * Parse the model from the XmlPullParser into the given sink.
210      *
211      * @param parser A parser, not null.
212      * @param sink the sink to receive the events.
213      * @param reference the reference (usually the file path of the parsed document)
214      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
215      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
216      */
217     private void parseXml(XmlPullParser parser, Sink sink, String reference)
218             throws XmlPullParserException, MacroExecutionException {
219         sink.setDocumentLocator(new XmlPullParserLocator(parser, reference));
220         int eventType = parser.getEventType();
221 
222         while (eventType != XmlPullParser.END_DOCUMENT) {
223             if (eventType == XmlPullParser.START_TAG) {
224                 handleStartTag(parser, sink);
225             } else if (eventType == XmlPullParser.END_TAG) {
226                 handleEndTag(parser, sink);
227             } else if (eventType == XmlPullParser.TEXT) {
228                 String text = getText(parser);
229 
230                 if (isIgnorableWhitespace()) {
231                     if (text.trim().length() != 0) {
232                         handleText(parser, sink);
233                     }
234                 } else {
235                     handleText(parser, sink);
236                 }
237             } else if (eventType == XmlPullParser.CDSECT) {
238                 handleCdsect(parser, sink);
239             } else if (eventType == XmlPullParser.COMMENT) {
240                 handleComment(parser, sink);
241             } else if (eventType == XmlPullParser.ENTITY_REF) {
242                 handleEntity(parser, sink);
243             } else if (eventType == XmlPullParser.IGNORABLE_WHITESPACE) {
244                 // nop
245             } else if (eventType == XmlPullParser.PROCESSING_INSTRUCTION) {
246                 // nop
247             } else if (eventType == XmlPullParser.DOCDECL) {
248                 addLocalEntities(parser, parser.getText());
249 
250                 for (byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values()) {
251                     addDTDEntities(parser, new String(res));
252                 }
253             }
254 
255             try {
256                 eventType = parser.nextToken();
257             } catch (IOException io) {
258                 // Does not have a cause arg
259                 throw new XmlPullParserException("Failed to parse next token", parser, io);
260             }
261         }
262     }
263 
264     /**
265      * Goes through the possible start tags.
266      *
267      * @param parser A parser, not null.
268      * @param sink the sink to receive the events.
269      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
270      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
271      */
272     protected abstract void handleStartTag(XmlPullParser parser, Sink sink)
273             throws XmlPullParserException, MacroExecutionException;
274 
275     /**
276      * Goes through the possible end tags.
277      *
278      * @param parser A parser, not null.
279      * @param sink the sink to receive the events.
280      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
281      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
282      */
283     protected abstract void handleEndTag(XmlPullParser parser, Sink sink)
284             throws XmlPullParserException, MacroExecutionException;
285 
286     /**
287      * Handles text events.
288      *
289      * <p>This is a default implementation, if the parser points to a non-empty text element,
290      * it is emitted as a text event into the specified sink.</p>
291      *
292      * @param parser A parser, not null.
293      * @param sink the sink to receive the events. Not null.
294      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
295      */
296     protected void handleText(XmlPullParser parser, Sink sink) throws XmlPullParserException {
297         String text = getText(parser);
298 
299         /*
300          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
301          * parser so any whitespace that makes it here is significant.
302          */
303         if (text != null && !text.isEmpty()) {
304             sink.text(text);
305         }
306     }
307 
308     /**
309      * Handles CDATA sections.
310      *
311      * <p>This is a default implementation, all data are emitted as text
312      * events into the specified sink.</p>
313      *
314      * @param parser A parser, not null.
315      * @param sink the sink to receive the events. Not null.
316      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
317      */
318     protected void handleCdsect(XmlPullParser parser, Sink sink) throws XmlPullParserException {
319         sink.text(getText(parser));
320     }
321 
322     /**
323      * Handles comments.
324      *
325      * <p>This is a default implementation, all data are emitted as comment
326      * events into the specified sink.</p>
327      *
328      * @param parser A parser, not null.
329      * @param sink the sink to receive the events. Not null.
330      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
331      */
332     protected void handleComment(XmlPullParser parser, Sink sink) throws XmlPullParserException {
333         if (isEmitComments()) {
334             sink.comment(getText(parser));
335         }
336     }
337 
338     /**
339      * Handles entities.
340      *
341      * <p>This is a default implementation, all entities are resolved and emitted as text
342      * events into the specified sink, except:</p>
343      * <ul>
344      * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
345      * are emitted as <code>nonBreakingSpace()</code> events.</li>
346      * </ul>
347      *
348      * @param parser A parser, not null.
349      * @param sink the sink to receive the events. Not null.
350      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
351      */
352     protected void handleEntity(XmlPullParser parser, Sink sink) throws XmlPullParserException {
353         String text = getText(parser);
354 
355         String name = parser.getName();
356 
357         if ("#160".equals(name) || "nbsp".equals(name) || "#x00A0".equals(name)) {
358             sink.nonBreakingSpace();
359         } else {
360             String unescaped = HtmlTools.unescapeHTML(text);
361 
362             sink.text(unescaped);
363         }
364     }
365 
366     /**
367      * Handles an unknown event.
368      *
369      * <p>This is a default implementation, all events are emitted as unknown
370      * events into the specified sink.</p>
371      *
372      * @param parser the parser to get the event from.
373      * @param sink the sink to receive the event.
374      * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
375      * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
376      * It will be passed as the first argument of the required parameters to the Sink
377      * {@link
378      * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
379      * method.
380      */
381     protected void handleUnknown(XmlPullParser parser, Sink sink, int type) {
382         SinkEventAttributeSet attribs = getAttributesFromParser(parser);
383 
384         handleUnknown(parser.getName(), attribs, sink, type);
385     }
386 
387     protected void handleUnknown(String elementName, SinkEventAttributeSet attribs, Sink sink, int type) {
388         Object[] required = new Object[] {type};
389         sink.unknown(elementName, required, attribs);
390     }
391 
392     /**
393      * <p>isIgnorableWhitespace.</p>
394      *
395      * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
396      * @see #setIgnorableWhitespace(boolean)
397      * @since 1.1
398      */
399     protected boolean isIgnorableWhitespace() {
400         return ignorableWhitespace;
401     }
402 
403     /**
404      * Specify that whitespace will be ignored. I.e.:
405      * <pre>&lt;tr&gt; &lt;td/&gt; &lt;/tr&gt;</pre>
406      * is equivalent to
407      * <pre>&lt;tr&gt;&lt;td/&gt;&lt;/tr&gt;</pre>
408      *
409      * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
410      * @since 1.1
411      */
412     protected void setIgnorableWhitespace(boolean ignorable) {
413         this.ignorableWhitespace = ignorable;
414     }
415 
416     /**
417      * <p>isCollapsibleWhitespace.</p>
418      *
419      * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
420      * @see #setCollapsibleWhitespace(boolean)
421      * @since 1.1
422      */
423     protected boolean isCollapsibleWhitespace() {
424         return collapsibleWhitespace;
425     }
426 
427     /**
428      * Specify that text will be collapsed. I.e.:
429      * <pre>Text   Text</pre>
430      * is equivalent to
431      * <pre>Text Text</pre>
432      *
433      * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
434      * @since 1.1
435      */
436     protected void setCollapsibleWhitespace(boolean collapsible) {
437         this.collapsibleWhitespace = collapsible;
438     }
439 
440     /**
441      * <p>isTrimmableWhitespace.</p>
442      *
443      * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
444      * @see #setTrimmableWhitespace(boolean)
445      * @since 1.1
446      */
447     protected boolean isTrimmableWhitespace() {
448         return trimmableWhitespace;
449     }
450 
451     /**
452      * Specify that text will be collapsed. I.e.:
453      * <pre>&lt;p&gt; Text &lt;/p&gt;</pre>
454      * is equivalent to
455      * <pre>&lt;p&gt;Text&lt;/p&gt;</pre>
456      *
457      * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
458      * @since 1.1
459      */
460     protected void setTrimmableWhitespace(boolean trimmable) {
461         this.trimmableWhitespace = trimmable;
462     }
463 
464     /**
465      * <p>getText.</p>
466      *
467      * @param parser A parser, not null.
468      * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
469      * @see XmlPullParser#getText()
470      * @see #isCollapsibleWhitespace()
471      * @see #isTrimmableWhitespace()
472      * @since 1.1
473      */
474     protected String getText(XmlPullParser parser) {
475         String text = parser.getText();
476 
477         if (isTrimmableWhitespace()) {
478             text = text.trim();
479         }
480 
481         if (isCollapsibleWhitespace()) {
482             StringBuilder newText = new StringBuilder();
483             String[] elts = StringUtils.split(text, " \r\n");
484             for (int i = 0; i < elts.length; i++) {
485                 newText.append(elts[i]);
486                 if ((i + 1) < elts.length) {
487                     newText.append(" ");
488                 }
489             }
490             text = newText.toString();
491         }
492 
493         return text;
494     }
495 
496     /**
497      * Return the defined entities in a local doctype. I.e.:
498      * <pre>
499      * &lt;!DOCTYPE foo [
500      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
501      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
502      * ]&gt;
503      * </pre>
504      *
505      * @return a map of the defined entities in a local doctype.
506      * @since 1.1
507      */
508     protected Map<String, String> getLocalEntities() {
509         if (entities == null) {
510             entities = new LinkedHashMap<>();
511         }
512 
513         return entities;
514     }
515 
516     /**
517      * <p>isValidate.</p>
518      *
519      * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
520      * @since 1.1
521      */
522     public boolean isValidate() {
523         return validate;
524     }
525 
526     /**
527      * Specify a flag to validate or not the XML content.
528      *
529      * @param validate the validate to set
530      * @see #parse(Reader, Sink)
531      * @since 1.1
532      */
533     public void setValidate(boolean validate) {
534         this.validate = validate;
535     }
536 
537     /**
538      * @since 2.0.0-M4
539      */
540     public boolean getAddDefaultEntities() {
541         return addDefaultEntities;
542     }
543 
544     /**
545      * @since 2.0.0-M4
546      */
547     public void setAddDefaultEntities(boolean addDefaultEntities) {
548         this.addDefaultEntities = addDefaultEntities;
549     }
550 
551     // ----------------------------------------------------------------------
552     // Private methods
553     // ----------------------------------------------------------------------
554 
555     /**
556      * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
557      * <br>
558      * By default, we exclude the default XML entities: &#38;amp;, &#38;lt;, &#38;gt;, &#38;quot; and &#38;apos;.
559      *
560      * @param parser not null
561      * @param entityName not null
562      * @param entityValue not null
563      * @throws XmlPullParserException if any
564      * @see XmlPullParser#defineEntityReplacementText(String, String)
565      */
566     private void addEntity(XmlPullParser parser, String entityName, String entityValue) throws XmlPullParserException {
567         if (entityName.endsWith("amp")
568                 || entityName.endsWith("lt")
569                 || entityName.endsWith("gt")
570                 || entityName.endsWith("quot")
571                 || entityName.endsWith("apos")) {
572             return;
573         }
574 
575         parser.defineEntityReplacementText(entityName, entityValue);
576         getLocalEntities().put(entityName, entityValue);
577     }
578 
579     /**
580      * Handle entities defined in a local doctype as the following:
581      * <pre>
582      * &lt;!DOCTYPE foo [
583      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
584      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
585      * ]&gt;
586      * </pre>
587      *
588      * @param parser not null
589      * @param text not null
590      * @throws XmlPullParserException if any
591      */
592     private void addLocalEntities(XmlPullParser parser, String text) throws XmlPullParserException {
593         int entitiesCount = StringUtils.countMatches(text, ENTITY_START);
594         if (entitiesCount > 0) {
595             // text should be foo [...]
596             int start = text.indexOf('[');
597             int end = text.lastIndexOf(']');
598             if (start != -1 && end != -1) {
599                 addDTDEntities(parser, text.substring(start + 1, end));
600             }
601         }
602     }
603 
604     /**
605      * Handle entities defined in external doctypes as the following:
606      * <pre>
607      * &lt;!DOCTYPE foo [
608      *   &lt;!-- These are the entity sets for ISO Latin 1 characters for the XHTML --&gt;
609      *   &lt;!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
610      *          "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"&gt;
611      *   %HTMLlat1;
612      * ]&gt;
613      * </pre>
614      *
615      * @param parser not null
616      * @param text not null
617      * @throws XmlPullParserException if any
618      */
619     private void addDTDEntities(XmlPullParser parser, String text) throws XmlPullParserException {
620         int entitiesCount = StringUtils.countMatches(text, ENTITY_START);
621         if (entitiesCount > 0) {
622             final String txt = StringUtils.replace(text, ENTITY_START, "\n" + ENTITY_START);
623             try (BufferedReader reader = new BufferedReader(new StringReader(txt))) {
624                 String line;
625                 String tmpLine = "";
626                 Matcher matcher;
627                 while ((line = reader.readLine()) != null) {
628                     tmpLine += "\n" + line;
629                     matcher = PATTERN_ENTITY_1.matcher(tmpLine);
630                     if (matcher.find() && matcher.groupCount() == 7) {
631                         String entityName = matcher.group(2);
632                         String entityValue = matcher.group(5);
633 
634                         addEntity(parser, entityName, entityValue);
635                         tmpLine = "";
636                     } else {
637                         matcher = PATTERN_ENTITY_2.matcher(tmpLine);
638                         if (matcher.find() && matcher.groupCount() == 8) {
639                             String entityName = matcher.group(2);
640                             String entityValue = matcher.group(5);
641 
642                             addEntity(parser, entityName, entityValue);
643                             tmpLine = "";
644                         }
645                     }
646                 }
647             } catch (IOException e) {
648                 // nop
649             }
650         }
651     }
652 
653     /**
654      * Implementation of the callback mechanism <code>EntityResolver</code>.
655      * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
656      */
657     public static class CachedFileEntityResolver implements EntityResolver {
658         private static final Logger LOGGER = LoggerFactory.getLogger(CachedFileEntityResolver.class);
659 
660         /** Map with systemId as key and the content of systemId as byte[]. */
661         protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>();
662 
663         private static final Map<String, String> WELL_KNOWN_SYSTEM_IDS = new HashMap<>();
664 
665         static {
666             WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/2001/xml.xsd", "xml.xsd");
667             WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/2001/xml.xsd", "xml.xsd");
668             WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd");
669             WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd");
670             WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd");
671             WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd");
672             WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent");
673             WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent");
674         }
675 
676         /** {@inheritDoc} */
677         public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
678             byte[] res = ENTITY_CACHE.get(systemId);
679             // already cached?
680             if (res == null) {
681                 if (WELL_KNOWN_SYSTEM_IDS.containsKey(systemId)) {
682                     String resource = "/" + WELL_KNOWN_SYSTEM_IDS.get(systemId);
683                     URL url = getClass().getResource(resource);
684                     if (url != null) {
685                         LOGGER.debug(
686                                 "Resolving SYSTEM '{}' from well-known classpath resource '{}'", systemId, resource);
687                         res = toByteArray(url);
688                     }
689                 }
690 
691                 if (res == null) {
692                     URI uri = URI.create(systemId);
693                     if (uri.getScheme() == null) {
694                         uri = Paths.get(systemId).toUri();
695                     }
696 
697                     LOGGER.debug("Resolving SYSTEM '{}' from URI resource '{}'", systemId, uri);
698                     res = toByteArray(uri.toURL());
699                 }
700 
701                 ENTITY_CACHE.put(systemId, res);
702             } else {
703                 LOGGER.debug("Resolved SYSTEM '{}' from cache", systemId);
704             }
705 
706             InputSource is = new InputSource(new ByteArrayInputStream(res));
707             is.setPublicId(publicId);
708             is.setSystemId(systemId);
709 
710             return is;
711         }
712 
713         /**
714          * @param url not null
715          * @return return an array of byte
716          * @throws SAXException if any
717          */
718         private static byte[] toByteArray(URL url) throws SAXException {
719             try (InputStream is = url.openStream()) {
720                 if (is == null) {
721                     throw new SAXException("Cannot open stream from the url: " + url);
722                 }
723                 return IOUtils.toByteArray(is);
724             } catch (IOException e) {
725                 throw new SAXException(e);
726             }
727         }
728     }
729 }