001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.maven.doxia.module.xhtml5;
020
021import javax.inject.Named;
022import javax.inject.Singleton;
023import javax.swing.text.html.HTML.Attribute;
024
025import java.io.IOException;
026import java.io.Reader;
027import java.io.StringReader;
028import java.io.StringWriter;
029import java.util.HashMap;
030import java.util.Map;
031
032import org.apache.commons.io.IOUtils;
033import org.apache.commons.lang3.StringUtils;
034import org.apache.maven.doxia.macro.MacroExecutionException;
035import org.apache.maven.doxia.macro.MacroRequest;
036import org.apache.maven.doxia.macro.manager.MacroNotFoundException;
037import org.apache.maven.doxia.parser.ParseException;
038import org.apache.maven.doxia.parser.Xhtml5BaseParser;
039import org.apache.maven.doxia.sink.Sink;
040import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
041import org.codehaus.plexus.util.xml.pull.XmlPullParser;
042import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045
046/**
047 * Parse an xhtml5 model and emit events into a Doxia Sink.
048 */
049@Singleton
050@Named("xhtml")
051public class Xhtml5Parser extends Xhtml5BaseParser implements Xhtml5Markup {
052    private static final Logger LOGGER = LoggerFactory.getLogger(Xhtml5Parser.class);
053
054    /** For verbatim source. */
055    protected boolean source;
056
057    /** Empty elements don't write a closing tag. */
058    private boolean isEmptyElement;
059
060    /**
061     * The source content of the input reader. Used to pass into macros.
062     */
063    private String sourceContent;
064
065    /** {@inheritDoc} */
066    protected void handleStartTag(XmlPullParser parser, Sink sink)
067            throws XmlPullParserException, MacroExecutionException {
068        isEmptyElement = parser.isEmptyElementTag();
069
070        SinkEventAttributeSet attribs = getAttributesFromParser(parser);
071
072        if (parser.getName().equals(HTML.toString())) {
073            // Do nothing
074            return;
075        } else if (parser.getName().equals(HEAD.toString())) {
076            sink.head(attribs);
077        } else if (parser.getName().equals(TITLE.toString())) {
078            sink.title(attribs);
079        } else if (parser.getName().equals(META.toString())) {
080            String name = parser.getAttributeValue(null, Attribute.NAME.toString());
081            String content = parser.getAttributeValue(null, Attribute.CONTENT.toString());
082
083            if ("author".equals(name)) {
084                sink.author(null);
085
086                sink.text(content);
087
088                sink.author_();
089            } else if ("date".equals(name)) {
090                sink.date(null);
091
092                sink.text(content);
093
094                sink.date_();
095            } else {
096                sink.unknown("meta", new Object[] {TAG_TYPE_SIMPLE}, attribs);
097            }
098        }
099        /*
100         * The ADDRESS element may be used by authors to supply contact information
101         * for a model or a major part of a model such as a form. This element
102         *  often appears at the beginning or end of a model.
103         */
104        else if (parser.getName().equals(ADDRESS.toString())) {
105            sink.address(attribs);
106        } else if (parser.getName().equals(BODY.toString())) {
107            sink.body(attribs);
108        } else if (parser.getName().equals(DIV.toString())) {
109            String divClass = parser.getAttributeValue(null, Attribute.CLASS.toString());
110
111            if ("verbatim source".equals(divClass)) {
112                this.source = true;
113            }
114
115            baseStartTag(parser, sink); // pick up other divs
116        }
117        /*
118         * The PRE element tells visual user agents that the enclosed text is
119         * "preformatted". When handling preformatted text, visual user agents:
120         * - May leave white space intact.
121         * - May render text with a fixed-pitch font.
122         * - May disable automatic word wrap.
123         * - Must not disable bidirectional processing.
124         * Non-visual user agents are not required to respect extra white space
125         * in the content of a PRE element.
126         */
127        else if (parser.getName().equals(PRE.toString())) {
128            if (source) {
129                attribs.addAttributes(SinkEventAttributeSet.SOURCE);
130            }
131
132            verbatim();
133
134            sink.verbatim(attribs);
135        } else if (!baseStartTag(parser, sink)) {
136            if (isEmptyElement) {
137                handleUnknown(parser, sink, TAG_TYPE_SIMPLE);
138            } else {
139                handleUnknown(parser, sink, TAG_TYPE_START);
140            }
141
142            LOGGER.warn(
143                    "Unrecognized xhtml5 tag <{}> at [{}:{}]",
144                    parser.getName(),
145                    parser.getLineNumber(),
146                    parser.getColumnNumber());
147        }
148    }
149
150    /** {@inheritDoc} */
151    protected void handleEndTag(XmlPullParser parser, Sink sink)
152            throws XmlPullParserException, MacroExecutionException {
153        if (parser.getName().equals(HTML.toString())) {
154            // Do nothing
155            return;
156        } else if (parser.getName().equals(HEAD.toString())) {
157            sink.head_();
158        } else if (parser.getName().equals(TITLE.toString())) {
159            sink.title_();
160        } else if (parser.getName().equals(BODY.toString())) {
161            emitHeadingSections(0, sink, false);
162
163            sink.body_();
164        } else if (parser.getName().equals(ADDRESS.toString())) {
165            sink.address_();
166        } else if (parser.getName().equals(DIV.toString())) {
167            this.source = false;
168            baseEndTag(parser, sink);
169        } else if (!baseEndTag(parser, sink)) {
170            if (!isEmptyElement) {
171                handleUnknown(parser, sink, TAG_TYPE_END);
172            }
173        }
174
175        isEmptyElement = false;
176    }
177
178    /** {@inheritDoc} */
179    @Override
180    protected void handleComment(XmlPullParser parser, Sink sink) throws XmlPullParserException {
181        String text = getText(parser).trim();
182
183        if (text.startsWith("MACRO") && !isSecondParsing()) {
184            processMacro(text, sink);
185        } else {
186            super.handleComment(parser, sink);
187        }
188    }
189
190    /** process macro embedded in XHTML commment */
191    private void processMacro(String text, Sink sink) throws XmlPullParserException {
192        String s = text.substring(text.indexOf('{') + 1, text.indexOf('}'));
193        s = escapeForMacro(s);
194        String[] params = StringUtils.split(s, "|");
195        String macroName = params[0];
196
197        Map<String, Object> parameters = new HashMap<>();
198        for (int i = 1; i < params.length; i++) {
199            String[] param = StringUtils.split(params[i], "=");
200            if (param.length == 1) {
201                throw new XmlPullParserException("Missing 'key=value' pair for macro parameter: " + params[i]);
202            }
203
204            String key = unescapeForMacro(param[0]);
205            String value = unescapeForMacro(param[1]);
206            parameters.put(key, value);
207        }
208
209        MacroRequest request = new MacroRequest(sourceContent, new Xhtml5Parser(), parameters, getBasedir());
210
211        try {
212            executeMacro(macroName, request, sink);
213        } catch (MacroExecutionException e) {
214            throw new XmlPullParserException("Unable to execute macro in the document: " + macroName);
215        } catch (MacroNotFoundException me) {
216            throw new XmlPullParserException("Macro not found: " + macroName);
217        }
218    }
219
220    /**
221     * escapeForMacro
222     *
223     * @param s String
224     * @return String
225     */
226    private String escapeForMacro(String s) {
227        if (s == null || s.length() < 1) {
228            return s;
229        }
230
231        String result = s;
232
233        // use some outrageously out-of-place chars for text
234        // (these are device control one/two in unicode)
235        result = StringUtils.replace(result, "\\=", "\u0011");
236        result = StringUtils.replace(result, "\\|", "\u0012");
237
238        return result;
239    }
240
241    /**
242     * unescapeForMacro
243     *
244     * @param s String
245     * @return String
246     */
247    private String unescapeForMacro(String s) {
248        if (s == null || s.length() < 1) {
249            return s;
250        }
251
252        String result = s;
253
254        result = StringUtils.replace(result, "\u0011", "=");
255        result = StringUtils.replace(result, "\u0012", "|");
256
257        return result;
258    }
259
260    /**
261     * {@inheritDoc}
262     */
263    protected void init() {
264        super.init();
265
266        this.source = false;
267        this.isEmptyElement = false;
268    }
269
270    /** {@inheritDoc} */
271    public void parse(Reader source, Sink sink, String reference) throws ParseException {
272        this.sourceContent = null;
273
274        try (Reader reader = source) {
275            StringWriter contentWriter = new StringWriter();
276            IOUtils.copy(reader, contentWriter);
277            sourceContent = contentWriter.toString();
278        } catch (IOException ex) {
279            throw new ParseException("Error reading the input source", ex);
280        }
281
282        try {
283            super.parse(new StringReader(sourceContent), sink, reference);
284        } finally {
285            this.sourceContent = null;
286        }
287    }
288}