001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.maven.doxia.module.xdoc;
020
021import javax.inject.Named;
022import javax.inject.Singleton;
023import javax.swing.text.html.HTML.Attribute;
024
025import java.io.IOException;
026import java.io.Reader;
027import java.io.StringReader;
028import java.io.StringWriter;
029import java.util.HashMap;
030import java.util.Map;
031
032import org.apache.commons.io.IOUtils;
033import org.apache.maven.doxia.macro.MacroExecutionException;
034import org.apache.maven.doxia.macro.MacroRequest;
035import org.apache.maven.doxia.macro.manager.MacroNotFoundException;
036import org.apache.maven.doxia.parser.ParseException;
037import org.apache.maven.doxia.parser.Xhtml1BaseParser;
038import org.apache.maven.doxia.sink.Sink;
039import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
040import org.apache.maven.doxia.util.HtmlTools;
041import org.codehaus.plexus.util.xml.pull.XmlPullParser;
042import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045
046/**
047 * Parse an xdoc model and emit events into the specified doxia Sink.
048 *
049 * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
050 * @since 1.0
051 */
052@Singleton
053@Named("xdoc")
054public class XdocParser extends Xhtml1BaseParser implements XdocMarkup {
055    private static final Logger LOGGER = LoggerFactory.getLogger(XdocParser.class);
056
057    /**
058     * The source content of the input reader. Used to pass into macros.
059     */
060    private String sourceContent;
061
062    /**
063     * Empty elements don't write a closing tag.
064     */
065    private boolean isEmptyElement;
066
067    /**
068     * A macro name.
069     */
070    private String macroName;
071
072    /**
073     * The macro parameters.
074     */
075    private Map<String, Object> macroParameters = new HashMap<>();
076
077    /**
078     * Indicates that we're inside &lt;properties&gt; or &lt;head&gt;.
079     */
080    private boolean inHead;
081
082    /**
083     * Indicates that &lt;title&gt; was called from &lt;properties&gt; or &lt;head&gt;.
084     */
085    private boolean hasTitle;
086
087    /** {@inheritDoc} */
088    public void parse(Reader source, Sink sink, String reference) throws ParseException {
089        this.sourceContent = null;
090
091        try (Reader reader = source) {
092            StringWriter contentWriter = new StringWriter();
093            IOUtils.copy(reader, contentWriter);
094            sourceContent = contentWriter.toString();
095        } catch (IOException ex) {
096            throw new ParseException("Error reading the input source", ex);
097        }
098
099        // leave this at default (false) until everything is properly implemented, see DOXIA-226
100        // setIgnorableWhitespace(true);
101
102        try {
103            super.parse(new StringReader(sourceContent), sink, reference);
104        } finally {
105            this.sourceContent = null;
106        }
107    }
108
109    /** {@inheritDoc} */
110    protected void handleStartTag(XmlPullParser parser, Sink sink)
111            throws XmlPullParserException, MacroExecutionException {
112        isEmptyElement = parser.isEmptyElementTag();
113
114        SinkEventAttributeSet attribs = getAttributesFromParser(parser);
115
116        if (parser.getName().equals(DOCUMENT_TAG.toString())) {
117            // Do nothing
118            return;
119        } else if (parser.getName().equals(HEAD.toString())) {
120            if (!inHead) // we might be in head from a <properties> already
121            {
122                this.inHead = true;
123
124                sink.head(attribs);
125            }
126        } else if (parser.getName().equals(TITLE.toString())) {
127            if (hasTitle) {
128                LOGGER.warn("<title> was already defined in <properties>, ignored <title> in <head>.");
129
130                try {
131                    parser.nextText(); // ignore next text event
132                } catch (IOException ex) {
133                    throw new XmlPullParserException("Failed to parse text", parser, ex);
134                }
135            } else {
136                sink.title(attribs);
137            }
138        } else if (parser.getName().equals(AUTHOR_TAG.toString())) {
139            sink.author(attribs);
140        } else if (parser.getName().equals(DATE_TAG.toString())) {
141            sink.date(attribs);
142        } else if (parser.getName().equals(META.toString())) {
143            handleMetaStart(parser, sink, attribs);
144        } else if (parser.getName().equals(BODY.toString())) {
145            if (inHead) {
146                sink.head_();
147                this.inHead = false;
148            }
149
150            sink.body(attribs);
151        } else if (parser.getName().equals(SECTION_TAG.toString())) {
152            handleSectionStart(Sink.SECTION_LEVEL_1, sink, attribs, parser);
153        } else if (parser.getName().equals(SUBSECTION_TAG.toString())) {
154            handleSectionStart(Sink.SECTION_LEVEL_2, sink, attribs, parser);
155        } else if (parser.getName().equals(SOURCE_TAG.toString())) {
156            verbatim();
157
158            attribs.addAttributes(SinkEventAttributeSet.SOURCE);
159
160            sink.verbatim(attribs);
161        } else if (parser.getName().equals(PROPERTIES_TAG.toString())) {
162            if (!inHead) // we might be in head from a <head> already
163            {
164                this.inHead = true;
165
166                sink.head(attribs);
167            }
168        }
169
170        // ----------------------------------------------------------------------
171        // Macro
172        // ----------------------------------------------------------------------
173
174        else if (parser.getName().equals(MACRO_TAG.toString())) {
175            handleMacroStart(parser);
176        } else if (parser.getName().equals(PARAM.toString())) {
177            handleParamStart(parser, sink);
178        } else if (!baseStartTag(parser, sink)) {
179            if (isEmptyElement) {
180                handleUnknown(parser, sink, TAG_TYPE_SIMPLE);
181            } else {
182                handleUnknown(parser, sink, TAG_TYPE_START);
183            }
184
185            LOGGER.warn(
186                    "Unrecognized xdoc tag <{}> at [{}:{}]",
187                    parser.getName(),
188                    parser.getLineNumber(),
189                    parser.getColumnNumber());
190        }
191    }
192
193    /** {@inheritDoc} */
194    protected void handleEndTag(XmlPullParser parser, Sink sink)
195            throws XmlPullParserException, MacroExecutionException {
196        if (parser.getName().equals(DOCUMENT_TAG.toString())) {
197            // Do nothing
198            return;
199        } else if (parser.getName().equals(HEAD.toString())) {
200            // Do nothing, head is closed with BODY start.
201        } else if (parser.getName().equals(BODY.toString())) {
202            consecutiveSections(0, sink);
203
204            sink.body_();
205        } else if (parser.getName().equals(TITLE.toString())) {
206            if (!hasTitle) {
207                sink.title_();
208                this.hasTitle = true;
209            }
210        } else if (parser.getName().equals(AUTHOR_TAG.toString())) {
211            sink.author_();
212        } else if (parser.getName().equals(DATE_TAG.toString())) {
213            sink.date_();
214        } else if (parser.getName().equals(SOURCE_TAG.toString())) {
215            verbatim_();
216
217            sink.verbatim_();
218        } else if (parser.getName().equals(PROPERTIES_TAG.toString())) {
219            // Do nothing, head is closed with BODY start.
220        } else if (parser.getName().equals(MACRO_TAG.toString())) {
221            handleMacroEnd(sink);
222        } else if (parser.getName().equals(PARAM.toString())) {
223            if (!(macroName != null && !macroName.isEmpty())) {
224                handleUnknown(parser, sink, TAG_TYPE_END);
225            }
226        } else if (parser.getName().equals(SECTION_TAG.toString())) {
227            consecutiveSections(0, sink);
228
229            sink.section1_();
230        } else if (parser.getName().equals(SUBSECTION_TAG.toString())) {
231            consecutiveSections(Sink.SECTION_LEVEL_1, sink);
232
233            // sink.section2_() not necessary
234        } else if (!baseEndTag(parser, sink)) {
235            if (!isEmptyElement) {
236                handleUnknown(parser, sink, TAG_TYPE_END);
237            }
238        }
239
240        isEmptyElement = false;
241    }
242
243    protected void consecutiveSections(int newLevel, Sink sink) {
244        closeOpenSections(newLevel, sink);
245        openMissingSections(newLevel, sink);
246
247        setSectionLevel(newLevel);
248    }
249
250    /**
251     * {@inheritDoc}
252     */
253    protected void init() {
254        super.init();
255
256        this.isEmptyElement = false;
257        this.macroName = null;
258        this.macroParameters = null;
259        this.inHead = false;
260        this.hasTitle = false;
261    }
262
263    /**
264     * Close open h2, h3, h4, h5 sections.
265     */
266    private void closeOpenSections(int newLevel, Sink sink) {
267        while (getSectionLevel() >= newLevel) {
268            if (getSectionLevel() > Sink.SECTION_LEVEL_1) {
269                sink.section_(getSectionLevel());
270            }
271
272            setSectionLevel(getSectionLevel() - 1);
273        }
274    }
275
276    private void handleMacroEnd(Sink sink) throws MacroExecutionException {
277        if (!isSecondParsing() && (macroName != null && !macroName.isEmpty())) {
278            MacroRequest request = new MacroRequest(sourceContent, new XdocParser(), macroParameters, getBasedir());
279
280            try {
281                executeMacro(macroName, request, sink);
282            } catch (MacroNotFoundException me) {
283                throw new MacroExecutionException("Macro not found: " + macroName, me);
284            }
285        }
286
287        // Reinit macro
288        macroName = null;
289        macroParameters = null;
290    }
291
292    private void handleMacroStart(XmlPullParser parser) throws MacroExecutionException {
293        if (!isSecondParsing()) {
294            macroName = parser.getAttributeValue(null, Attribute.NAME.toString());
295
296            if (macroParameters == null) {
297                macroParameters = new HashMap<>();
298            }
299
300            if (macroName == null || macroName.isEmpty()) {
301                throw new MacroExecutionException("The '" + Attribute.NAME.toString() + "' attribute for the '"
302                        + MACRO_TAG.toString() + "' tag is required.");
303            }
304        }
305    }
306
307    private void handleMetaStart(XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs) {
308        String name = parser.getAttributeValue(null, Attribute.NAME.toString());
309        String content = parser.getAttributeValue(null, Attribute.CONTENT.toString());
310
311        if ("author".equals(name)) {
312            sink.author(null);
313            sink.text(content);
314            sink.author_();
315        } else if ("date".equals(name)) {
316            sink.date(null);
317            sink.text(content);
318            sink.date_();
319        } else {
320            sink.unknown("meta", new Object[] {TAG_TYPE_SIMPLE}, attribs);
321        }
322    }
323
324    private void handleParamStart(XmlPullParser parser, Sink sink) throws MacroExecutionException {
325        if (!isSecondParsing()) {
326            if (macroName != null && !macroName.isEmpty()) {
327                String paramName = parser.getAttributeValue(null, Attribute.NAME.toString());
328                String paramValue = parser.getAttributeValue(null, Attribute.VALUE.toString());
329
330                if ((paramName == null || paramName.isEmpty()) || (paramValue == null || paramValue.isEmpty())) {
331                    throw new MacroExecutionException(
332                            "'" + Attribute.NAME.toString() + "' and '" + Attribute.VALUE.toString()
333                                    + "' attributes for the '" + PARAM.toString() + "' tag are required inside the '"
334                                    + MACRO_TAG.toString() + "' tag.");
335                }
336
337                macroParameters.put(paramName, paramValue);
338            } else {
339                // param tag from non-macro object, see MSITE-288
340                handleUnknown(parser, sink, TAG_TYPE_START);
341            }
342        }
343    }
344
345    private void handleSectionStart(int level, Sink sink, SinkEventAttributeSet attribs, XmlPullParser parser) {
346        consecutiveSections(level, sink);
347
348        Object id = attribs.getAttribute(Attribute.ID.toString());
349
350        if (id != null) {
351            sink.anchor(id.toString());
352            sink.anchor_();
353        }
354
355        sink.section(level, attribs);
356        sink.sectionTitle(level, null);
357        sink.text(HtmlTools.unescapeHTML(parser.getAttributeValue(null, Attribute.NAME.toString())));
358        sink.sectionTitle_(level);
359    }
360
361    /**
362     * Open missing h2, h3, h4, h5 sections.
363     */
364    private void openMissingSections(int newLevel, Sink sink) {
365        while (getSectionLevel() < newLevel - 1) {
366            setSectionLevel(getSectionLevel() + 1);
367
368            if (getSectionLevel() == Sink.SECTION_LEVEL_5) {
369                sink.section5();
370            } else if (getSectionLevel() == Sink.SECTION_LEVEL_4) {
371                sink.section4();
372            } else if (getSectionLevel() == Sink.SECTION_LEVEL_3) {
373                sink.section3();
374            } else if (getSectionLevel() == Sink.SECTION_LEVEL_2) {
375                sink.section2();
376            }
377        }
378    }
379}