001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.maven.doxia.parser;
020
021import java.io.BufferedReader;
022import java.io.ByteArrayInputStream;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.Reader;
026import java.io.StringReader;
027import java.net.URI;
028import java.net.URL;
029import java.nio.file.Paths;
030import java.util.HashMap;
031import java.util.Hashtable;
032import java.util.LinkedHashMap;
033import java.util.Map;
034import java.util.regex.Matcher;
035import java.util.regex.Pattern;
036
037import org.apache.commons.io.IOUtils;
038import org.apache.commons.lang3.StringUtils;
039import org.apache.maven.doxia.macro.MacroExecutionException;
040import org.apache.maven.doxia.markup.XmlMarkup;
041import org.apache.maven.doxia.sink.Sink;
042import org.apache.maven.doxia.sink.impl.AbstractLocator;
043import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
044import org.apache.maven.doxia.util.HtmlTools;
045import org.apache.maven.doxia.util.XmlValidator;
046import org.codehaus.plexus.util.xml.pull.EntityReplacementMap;
047import org.codehaus.plexus.util.xml.pull.MXParser;
048import org.codehaus.plexus.util.xml.pull.XmlPullParser;
049import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052import org.xml.sax.EntityResolver;
053import org.xml.sax.InputSource;
054import org.xml.sax.SAXException;
055
056/**
057 * An abstract class that defines some convenience methods for <code>XML</code> parsers.
058 *
059 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
060 * @since 1.0
061 */
062public abstract class AbstractXmlParser extends AbstractParser implements XmlMarkup {
063    /**
064     * Entity pattern for HTML entity, i.e. &#38;nbsp;
065     * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
066     * <br>
067     * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
068     */
069    private static final Pattern PATTERN_ENTITY_1 =
070            Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>");
071
072    /**
073     * Entity pattern for Unicode entity, i.e. &#38;#38;
074     * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
075     * <br>
076     * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
077     */
078    private static final Pattern PATTERN_ENTITY_2 =
079            Pattern.compile(ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>");
080
081    private boolean ignorableWhitespace;
082
083    private boolean collapsibleWhitespace;
084
085    private boolean trimmableWhitespace;
086
087    private Map<String, String> entities;
088
089    private boolean validate = false;
090
091    /**
092     * If set the parser will be loaded with all single characters
093     * from the XHTML specification.
094     * The entities used:
095     * <ul>
096     * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent</li>
097     * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent</li>
098     * <li>http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent</li>
099     * </ul>
100     */
101    private boolean addDefaultEntities = true;
102
103    /** {@inheritDoc} */
104    public void parse(Reader source, Sink sink, String reference) throws ParseException {
105        init();
106
107        Reader src = source;
108
109        // 1 first parsing if validation is required
110        if (isValidate()) {
111            String content;
112            try {
113                content = IOUtils.toString(new BufferedReader(src));
114            } catch (IOException e) {
115                throw new ParseException("Error reading the model", e);
116            }
117
118            XmlValidator validator = new XmlValidator();
119            validator.setDefaultHandler(new XmlValidator.MessagesErrorHandler());
120            validator.setEntityResolver(new CachedFileEntityResolver());
121
122            validator.validate(content);
123
124            src = new StringReader(content);
125        }
126
127        // 2 second parsing to process
128        try {
129            XmlPullParser parser = addDefaultEntities
130                    ? new MXParser(EntityReplacementMap.defaultEntityReplacementMap)
131                    : new MXParser();
132
133            parser.setInput(src);
134
135            // allow parser initialization, e.g. for additional entities in XHTML
136            // Note: do it after input is set, otherwise values are reset
137            initXmlParser(parser);
138
139            parseXml(parser, getWrappedSink(sink), reference);
140        } catch (XmlPullParserException ex) {
141            throw new ParseException("Error parsing the model", ex, ex.getLineNumber(), ex.getColumnNumber());
142        } catch (MacroExecutionException ex) {
143            throw new ParseException("Macro execution failed", ex);
144        }
145
146        setSecondParsing(false);
147        init();
148    }
149
150    /**
151     * Initializes the parser with custom entities or other options.
152     *
153     * @param parser A parser, not null.
154     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser
155     */
156    protected void initXmlParser(XmlPullParser parser) throws XmlPullParserException {
157        // nop
158    }
159
160    /** {@inheritDoc} */
161    @Override
162    public final int getType() {
163        return XML_TYPE;
164    }
165
166    /**
167     * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
168     *
169     * @param parser A parser, not null.
170     * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
171     * @since 1.1
172     */
173    protected SinkEventAttributeSet getAttributesFromParser(XmlPullParser parser) {
174        int count = parser.getAttributeCount();
175
176        if (count < 0) {
177            return null;
178        }
179
180        SinkEventAttributeSet atts = new SinkEventAttributeSet(count);
181
182        for (int i = 0; i < count; i++) {
183            atts.addAttribute(parser.getAttributeName(i), parser.getAttributeValue(i));
184        }
185
186        return atts;
187    }
188
189    private static final class XmlPullParserLocator extends AbstractLocator {
190
191        private final XmlPullParser parser;
192
193        XmlPullParserLocator(XmlPullParser parser, String reference) {
194            super(reference);
195            this.parser = parser;
196        }
197
198        @Override
199        public int getLineNumber() {
200            return parser.getLineNumber();
201        }
202
203        @Override
204        public int getColumnNumber() {
205            return parser.getColumnNumber() != -1 ? parser.getColumnNumber() + 1 : -1;
206        }
207    }
208    /**
209     * Parse the model from the XmlPullParser into the given sink.
210     *
211     * @param parser A parser, not null.
212     * @param sink the sink to receive the events.
213     * @param reference the reference (usually the file path of the parsed document)
214     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
215     * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
216     */
217    private void parseXml(XmlPullParser parser, Sink sink, String reference)
218            throws XmlPullParserException, MacroExecutionException {
219        sink.setDocumentLocator(new XmlPullParserLocator(parser, reference));
220        int eventType = parser.getEventType();
221
222        while (eventType != XmlPullParser.END_DOCUMENT) {
223            if (eventType == XmlPullParser.START_TAG) {
224                handleStartTag(parser, sink);
225            } else if (eventType == XmlPullParser.END_TAG) {
226                handleEndTag(parser, sink);
227            } else if (eventType == XmlPullParser.TEXT) {
228                String text = getText(parser);
229
230                if (isIgnorableWhitespace()) {
231                    if (text.trim().length() != 0) {
232                        handleText(parser, sink);
233                    }
234                } else {
235                    handleText(parser, sink);
236                }
237            } else if (eventType == XmlPullParser.CDSECT) {
238                handleCdsect(parser, sink);
239            } else if (eventType == XmlPullParser.COMMENT) {
240                handleComment(parser, sink);
241            } else if (eventType == XmlPullParser.ENTITY_REF) {
242                handleEntity(parser, sink);
243            } else if (eventType == XmlPullParser.IGNORABLE_WHITESPACE) {
244                // nop
245            } else if (eventType == XmlPullParser.PROCESSING_INSTRUCTION) {
246                // nop
247            } else if (eventType == XmlPullParser.DOCDECL) {
248                addLocalEntities(parser, parser.getText());
249
250                for (byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values()) {
251                    addDTDEntities(parser, new String(res));
252                }
253            }
254
255            try {
256                eventType = parser.nextToken();
257            } catch (IOException io) {
258                // Does not have a cause arg
259                throw new XmlPullParserException("Failed to parse next token", parser, io);
260            }
261        }
262    }
263
264    /**
265     * Goes through the possible start tags.
266     *
267     * @param parser A parser, not null.
268     * @param sink the sink to receive the events.
269     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
270     * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
271     */
272    protected abstract void handleStartTag(XmlPullParser parser, Sink sink)
273            throws XmlPullParserException, MacroExecutionException;
274
275    /**
276     * Goes through the possible end tags.
277     *
278     * @param parser A parser, not null.
279     * @param sink the sink to receive the events.
280     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
281     * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
282     */
283    protected abstract void handleEndTag(XmlPullParser parser, Sink sink)
284            throws XmlPullParserException, MacroExecutionException;
285
286    /**
287     * Handles text events.
288     *
289     * <p>This is a default implementation, if the parser points to a non-empty text element,
290     * it is emitted as a text event into the specified sink.</p>
291     *
292     * @param parser A parser, not null.
293     * @param sink the sink to receive the events. Not null.
294     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
295     */
296    protected void handleText(XmlPullParser parser, Sink sink) throws XmlPullParserException {
297        String text = getText(parser);
298
299        /*
300         * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
301         * parser so any whitespace that makes it here is significant.
302         */
303        if (text != null && !text.isEmpty()) {
304            sink.text(text);
305        }
306    }
307
308    /**
309     * Handles CDATA sections.
310     *
311     * <p>This is a default implementation, all data are emitted as text
312     * events into the specified sink.</p>
313     *
314     * @param parser A parser, not null.
315     * @param sink the sink to receive the events. Not null.
316     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
317     */
318    protected void handleCdsect(XmlPullParser parser, Sink sink) throws XmlPullParserException {
319        sink.text(getText(parser));
320    }
321
322    /**
323     * Handles comments.
324     *
325     * <p>This is a default implementation, all data are emitted as comment
326     * events into the specified sink.</p>
327     *
328     * @param parser A parser, not null.
329     * @param sink the sink to receive the events. Not null.
330     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
331     */
332    protected void handleComment(XmlPullParser parser, Sink sink) throws XmlPullParserException {
333        if (isEmitComments()) {
334            sink.comment(getText(parser));
335        }
336    }
337
338    /**
339     * Handles entities.
340     *
341     * <p>This is a default implementation, all entities are resolved and emitted as text
342     * events into the specified sink, except:</p>
343     * <ul>
344     * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
345     * are emitted as <code>nonBreakingSpace()</code> events.</li>
346     * </ul>
347     *
348     * @param parser A parser, not null.
349     * @param sink the sink to receive the events. Not null.
350     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
351     */
352    protected void handleEntity(XmlPullParser parser, Sink sink) throws XmlPullParserException {
353        String text = getText(parser);
354
355        String name = parser.getName();
356
357        if ("#160".equals(name) || "nbsp".equals(name) || "#x00A0".equals(name)) {
358            sink.nonBreakingSpace();
359        } else {
360            String unescaped = HtmlTools.unescapeHTML(text);
361
362            sink.text(unescaped);
363        }
364    }
365
366    /**
367     * Handles an unknown event.
368     *
369     * <p>This is a default implementation, all events are emitted as unknown
370     * events into the specified sink.</p>
371     *
372     * @param parser the parser to get the event from.
373     * @param sink the sink to receive the event.
374     * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
375     * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
376     * It will be passed as the first argument of the required parameters to the Sink
377     * {@link
378     * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
379     * method.
380     */
381    protected void handleUnknown(XmlPullParser parser, Sink sink, int type) {
382        SinkEventAttributeSet attribs = getAttributesFromParser(parser);
383
384        handleUnknown(parser.getName(), attribs, sink, type);
385    }
386
387    protected void handleUnknown(String elementName, SinkEventAttributeSet attribs, Sink sink, int type) {
388        Object[] required = new Object[] {type};
389        sink.unknown(elementName, required, attribs);
390    }
391
392    /**
393     * <p>isIgnorableWhitespace.</p>
394     *
395     * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
396     * @see #setIgnorableWhitespace(boolean)
397     * @since 1.1
398     */
399    protected boolean isIgnorableWhitespace() {
400        return ignorableWhitespace;
401    }
402
403    /**
404     * Specify that whitespace will be ignored. I.e.:
405     * <pre>&lt;tr&gt; &lt;td/&gt; &lt;/tr&gt;</pre>
406     * is equivalent to
407     * <pre>&lt;tr&gt;&lt;td/&gt;&lt;/tr&gt;</pre>
408     *
409     * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
410     * @since 1.1
411     */
412    protected void setIgnorableWhitespace(boolean ignorable) {
413        this.ignorableWhitespace = ignorable;
414    }
415
416    /**
417     * <p>isCollapsibleWhitespace.</p>
418     *
419     * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
420     * @see #setCollapsibleWhitespace(boolean)
421     * @since 1.1
422     */
423    protected boolean isCollapsibleWhitespace() {
424        return collapsibleWhitespace;
425    }
426
427    /**
428     * Specify that text will be collapsed. I.e.:
429     * <pre>Text   Text</pre>
430     * is equivalent to
431     * <pre>Text Text</pre>
432     *
433     * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
434     * @since 1.1
435     */
436    protected void setCollapsibleWhitespace(boolean collapsible) {
437        this.collapsibleWhitespace = collapsible;
438    }
439
440    /**
441     * <p>isTrimmableWhitespace.</p>
442     *
443     * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
444     * @see #setTrimmableWhitespace(boolean)
445     * @since 1.1
446     */
447    protected boolean isTrimmableWhitespace() {
448        return trimmableWhitespace;
449    }
450
451    /**
452     * Specify that text will be collapsed. I.e.:
453     * <pre>&lt;p&gt; Text &lt;/p&gt;</pre>
454     * is equivalent to
455     * <pre>&lt;p&gt;Text&lt;/p&gt;</pre>
456     *
457     * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
458     * @since 1.1
459     */
460    protected void setTrimmableWhitespace(boolean trimmable) {
461        this.trimmableWhitespace = trimmable;
462    }
463
464    /**
465     * <p>getText.</p>
466     *
467     * @param parser A parser, not null.
468     * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
469     * @see XmlPullParser#getText()
470     * @see #isCollapsibleWhitespace()
471     * @see #isTrimmableWhitespace()
472     * @since 1.1
473     */
474    protected String getText(XmlPullParser parser) {
475        String text = parser.getText();
476
477        if (isTrimmableWhitespace()) {
478            text = text.trim();
479        }
480
481        if (isCollapsibleWhitespace()) {
482            StringBuilder newText = new StringBuilder();
483            String[] elts = StringUtils.split(text, " \r\n");
484            for (int i = 0; i < elts.length; i++) {
485                newText.append(elts[i]);
486                if ((i + 1) < elts.length) {
487                    newText.append(" ");
488                }
489            }
490            text = newText.toString();
491        }
492
493        return text;
494    }
495
496    /**
497     * Return the defined entities in a local doctype. I.e.:
498     * <pre>
499     * &lt;!DOCTYPE foo [
500     *   &lt;!ENTITY bar "&#38;#x160;"&gt;
501     *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
502     * ]&gt;
503     * </pre>
504     *
505     * @return a map of the defined entities in a local doctype.
506     * @since 1.1
507     */
508    protected Map<String, String> getLocalEntities() {
509        if (entities == null) {
510            entities = new LinkedHashMap<>();
511        }
512
513        return entities;
514    }
515
516    /**
517     * <p>isValidate.</p>
518     *
519     * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
520     * @since 1.1
521     */
522    public boolean isValidate() {
523        return validate;
524    }
525
526    /**
527     * Specify a flag to validate or not the XML content.
528     *
529     * @param validate the validate to set
530     * @see #parse(Reader, Sink)
531     * @since 1.1
532     */
533    public void setValidate(boolean validate) {
534        this.validate = validate;
535    }
536
537    /**
538     * @since 2.0.0-M4
539     */
540    public boolean getAddDefaultEntities() {
541        return addDefaultEntities;
542    }
543
544    /**
545     * @since 2.0.0-M4
546     */
547    public void setAddDefaultEntities(boolean addDefaultEntities) {
548        this.addDefaultEntities = addDefaultEntities;
549    }
550
551    // ----------------------------------------------------------------------
552    // Private methods
553    // ----------------------------------------------------------------------
554
555    /**
556     * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
557     * <br>
558     * By default, we exclude the default XML entities: &#38;amp;, &#38;lt;, &#38;gt;, &#38;quot; and &#38;apos;.
559     *
560     * @param parser not null
561     * @param entityName not null
562     * @param entityValue not null
563     * @throws XmlPullParserException if any
564     * @see XmlPullParser#defineEntityReplacementText(String, String)
565     */
566    private void addEntity(XmlPullParser parser, String entityName, String entityValue) throws XmlPullParserException {
567        if (entityName.endsWith("amp")
568                || entityName.endsWith("lt")
569                || entityName.endsWith("gt")
570                || entityName.endsWith("quot")
571                || entityName.endsWith("apos")) {
572            return;
573        }
574
575        parser.defineEntityReplacementText(entityName, entityValue);
576        getLocalEntities().put(entityName, entityValue);
577    }
578
579    /**
580     * Handle entities defined in a local doctype as the following:
581     * <pre>
582     * &lt;!DOCTYPE foo [
583     *   &lt;!ENTITY bar "&#38;#x160;"&gt;
584     *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
585     * ]&gt;
586     * </pre>
587     *
588     * @param parser not null
589     * @param text not null
590     * @throws XmlPullParserException if any
591     */
592    private void addLocalEntities(XmlPullParser parser, String text) throws XmlPullParserException {
593        int entitiesCount = StringUtils.countMatches(text, ENTITY_START);
594        if (entitiesCount > 0) {
595            // text should be foo [...]
596            int start = text.indexOf('[');
597            int end = text.lastIndexOf(']');
598            if (start != -1 && end != -1) {
599                addDTDEntities(parser, text.substring(start + 1, end));
600            }
601        }
602    }
603
604    /**
605     * Handle entities defined in external doctypes as the following:
606     * <pre>
607     * &lt;!DOCTYPE foo [
608     *   &lt;!-- These are the entity sets for ISO Latin 1 characters for the XHTML --&gt;
609     *   &lt;!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
610     *          "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"&gt;
611     *   %HTMLlat1;
612     * ]&gt;
613     * </pre>
614     *
615     * @param parser not null
616     * @param text not null
617     * @throws XmlPullParserException if any
618     */
619    private void addDTDEntities(XmlPullParser parser, String text) throws XmlPullParserException {
620        int entitiesCount = StringUtils.countMatches(text, ENTITY_START);
621        if (entitiesCount > 0) {
622            final String txt = StringUtils.replace(text, ENTITY_START, "\n" + ENTITY_START);
623            try (BufferedReader reader = new BufferedReader(new StringReader(txt))) {
624                String line;
625                String tmpLine = "";
626                Matcher matcher;
627                while ((line = reader.readLine()) != null) {
628                    tmpLine += "\n" + line;
629                    matcher = PATTERN_ENTITY_1.matcher(tmpLine);
630                    if (matcher.find() && matcher.groupCount() == 7) {
631                        String entityName = matcher.group(2);
632                        String entityValue = matcher.group(5);
633
634                        addEntity(parser, entityName, entityValue);
635                        tmpLine = "";
636                    } else {
637                        matcher = PATTERN_ENTITY_2.matcher(tmpLine);
638                        if (matcher.find() && matcher.groupCount() == 8) {
639                            String entityName = matcher.group(2);
640                            String entityValue = matcher.group(5);
641
642                            addEntity(parser, entityName, entityValue);
643                            tmpLine = "";
644                        }
645                    }
646                }
647            } catch (IOException e) {
648                // nop
649            }
650        }
651    }
652
653    /**
654     * Implementation of the callback mechanism <code>EntityResolver</code>.
655     * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
656     */
657    public static class CachedFileEntityResolver implements EntityResolver {
658        private static final Logger LOGGER = LoggerFactory.getLogger(CachedFileEntityResolver.class);
659
660        /** Map with systemId as key and the content of systemId as byte[]. */
661        protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>();
662
663        private static final Map<String, String> WELL_KNOWN_SYSTEM_IDS = new HashMap<>();
664
665        static {
666            WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/2001/xml.xsd", "xml.xsd");
667            WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/2001/xml.xsd", "xml.xsd");
668            WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd");
669            WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/xdoc-2.0.xsd", "xdoc-2.0.xsd");
670            WELL_KNOWN_SYSTEM_IDS.put("http://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd");
671            WELL_KNOWN_SYSTEM_IDS.put("https://maven.apache.org/xsd/fml-1.0.1.xsd", "fml-1.0.1.xsd");
672            WELL_KNOWN_SYSTEM_IDS.put("http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent");
673            WELL_KNOWN_SYSTEM_IDS.put("https://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent", "xhtml-lat1.ent");
674        }
675
676        /** {@inheritDoc} */
677        public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
678            byte[] res = ENTITY_CACHE.get(systemId);
679            // already cached?
680            if (res == null) {
681                if (WELL_KNOWN_SYSTEM_IDS.containsKey(systemId)) {
682                    String resource = "/" + WELL_KNOWN_SYSTEM_IDS.get(systemId);
683                    URL url = getClass().getResource(resource);
684                    if (url != null) {
685                        LOGGER.debug(
686                                "Resolving SYSTEM '{}' from well-known classpath resource '{}'", systemId, resource);
687                        res = toByteArray(url);
688                    }
689                }
690
691                if (res == null) {
692                    URI uri = URI.create(systemId);
693                    if (uri.getScheme() == null) {
694                        uri = Paths.get(systemId).toUri();
695                    }
696
697                    LOGGER.debug("Resolving SYSTEM '{}' from URI resource '{}'", systemId, uri);
698                    res = toByteArray(uri.toURL());
699                }
700
701                ENTITY_CACHE.put(systemId, res);
702            } else {
703                LOGGER.debug("Resolved SYSTEM '{}' from cache", systemId);
704            }
705
706            InputSource is = new InputSource(new ByteArrayInputStream(res));
707            is.setPublicId(publicId);
708            is.setSystemId(systemId);
709
710            return is;
711        }
712
713        /**
714         * @param url not null
715         * @return return an array of byte
716         * @throws SAXException if any
717         */
718        private static byte[] toByteArray(URL url) throws SAXException {
719            try (InputStream is = url.openStream()) {
720                if (is == null) {
721                    throw new SAXException("Cannot open stream from the url: " + url);
722                }
723                return IOUtils.toByteArray(is);
724            } catch (IOException e) {
725                throw new SAXException(e);
726            }
727        }
728    }
729}