View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.doxia.module.xhtml5;
20  
21  import javax.inject.Named;
22  import javax.inject.Singleton;
23  import javax.swing.text.html.HTML.Attribute;
24  
25  import java.io.IOException;
26  import java.io.Reader;
27  import java.io.StringReader;
28  import java.io.StringWriter;
29  import java.util.HashMap;
30  import java.util.Map;
31  
32  import org.apache.commons.io.IOUtils;
33  import org.apache.commons.lang3.StringUtils;
34  import org.apache.maven.doxia.macro.MacroExecutionException;
35  import org.apache.maven.doxia.macro.MacroRequest;
36  import org.apache.maven.doxia.macro.manager.MacroNotFoundException;
37  import org.apache.maven.doxia.parser.ParseException;
38  import org.apache.maven.doxia.parser.Xhtml5BaseParser;
39  import org.apache.maven.doxia.sink.Sink;
40  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
41  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
42  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
43  import org.slf4j.Logger;
44  import org.slf4j.LoggerFactory;
45  
46  /**
47   * Parse an xhtml5 model and emit events into a Doxia Sink.
48   */
49  @Singleton
50  @Named("xhtml")
51  public class Xhtml5Parser extends Xhtml5BaseParser implements Xhtml5Markup {
52      private static final Logger LOGGER = LoggerFactory.getLogger(Xhtml5Parser.class);
53  
54      /** For verbatim source. */
55      protected boolean source;
56  
57      /** Empty elements don't write a closing tag. */
58      private boolean isEmptyElement;
59  
60      /**
61       * The source content of the input reader. Used to pass into macros.
62       */
63      private String sourceContent;
64  
65      /** {@inheritDoc} */
66      protected void handleStartTag(XmlPullParser parser, Sink sink)
67              throws XmlPullParserException, MacroExecutionException {
68          isEmptyElement = parser.isEmptyElementTag();
69  
70          SinkEventAttributeSet attribs = getAttributesFromParser(parser);
71  
72          if (parser.getName().equals(HTML.toString())) {
73              // Do nothing
74              return;
75          } else if (parser.getName().equals(HEAD.toString())) {
76              sink.head(attribs);
77          } else if (parser.getName().equals(TITLE.toString())) {
78              sink.title(attribs);
79          } else if (parser.getName().equals(META.toString())) {
80              String name = parser.getAttributeValue(null, Attribute.NAME.toString());
81              String content = parser.getAttributeValue(null, Attribute.CONTENT.toString());
82  
83              if ("author".equals(name)) {
84                  sink.author(null);
85  
86                  sink.text(content);
87  
88                  sink.author_();
89              } else if ("date".equals(name)) {
90                  sink.date(null);
91  
92                  sink.text(content);
93  
94                  sink.date_();
95              } else {
96                  sink.unknown("meta", new Object[] {TAG_TYPE_SIMPLE}, attribs);
97              }
98          }
99          /*
100          * The ADDRESS element may be used by authors to supply contact information
101          * for a model or a major part of a model such as a form. This element
102          *  often appears at the beginning or end of a model.
103          */
104         else if (parser.getName().equals(ADDRESS.toString())) {
105             sink.address(attribs);
106         } else if (parser.getName().equals(BODY.toString())) {
107             sink.body(attribs);
108         } else if (parser.getName().equals(DIV.toString())) {
109             String divClass = parser.getAttributeValue(null, Attribute.CLASS.toString());
110 
111             if ("verbatim source".equals(divClass)) {
112                 this.source = true;
113             }
114 
115             baseStartTag(parser, sink); // pick up other divs
116         }
117         /*
118          * The PRE element tells visual user agents that the enclosed text is
119          * "preformatted". When handling preformatted text, visual user agents:
120          * - May leave white space intact.
121          * - May render text with a fixed-pitch font.
122          * - May disable automatic word wrap.
123          * - Must not disable bidirectional processing.
124          * Non-visual user agents are not required to respect extra white space
125          * in the content of a PRE element.
126          */
127         else if (parser.getName().equals(PRE.toString())) {
128             if (source) {
129                 attribs.addAttributes(SinkEventAttributeSet.SOURCE);
130             }
131 
132             verbatim();
133 
134             sink.verbatim(attribs);
135         } else if (!baseStartTag(parser, sink)) {
136             if (isEmptyElement) {
137                 handleUnknown(parser, sink, TAG_TYPE_SIMPLE);
138             } else {
139                 handleUnknown(parser, sink, TAG_TYPE_START);
140             }
141 
142             LOGGER.warn(
143                     "Unrecognized xhtml5 tag <{}> at [{}:{}]",
144                     parser.getName(),
145                     parser.getLineNumber(),
146                     parser.getColumnNumber());
147         }
148     }
149 
150     /** {@inheritDoc} */
151     protected void handleEndTag(XmlPullParser parser, Sink sink)
152             throws XmlPullParserException, MacroExecutionException {
153         if (parser.getName().equals(HTML.toString())) {
154             // Do nothing
155             return;
156         } else if (parser.getName().equals(HEAD.toString())) {
157             sink.head_();
158         } else if (parser.getName().equals(TITLE.toString())) {
159             sink.title_();
160         } else if (parser.getName().equals(BODY.toString())) {
161             emitHeadingSections(0, sink, false);
162 
163             sink.body_();
164         } else if (parser.getName().equals(ADDRESS.toString())) {
165             sink.address_();
166         } else if (parser.getName().equals(DIV.toString())) {
167             this.source = false;
168             baseEndTag(parser, sink);
169         } else if (!baseEndTag(parser, sink)) {
170             if (!isEmptyElement) {
171                 handleUnknown(parser, sink, TAG_TYPE_END);
172             }
173         }
174 
175         isEmptyElement = false;
176     }
177 
178     /** {@inheritDoc} */
179     @Override
180     protected void handleComment(XmlPullParser parser, Sink sink) throws XmlPullParserException {
181         String text = getText(parser).trim();
182 
183         if (text.startsWith("MACRO") && !isSecondParsing()) {
184             processMacro(text, sink);
185         } else {
186             super.handleComment(parser, sink);
187         }
188     }
189 
190     /** process macro embedded in XHTML commment */
191     private void processMacro(String text, Sink sink) throws XmlPullParserException {
192         String s = text.substring(text.indexOf('{') + 1, text.indexOf('}'));
193         s = escapeForMacro(s);
194         String[] params = StringUtils.split(s, "|");
195         String macroName = params[0];
196 
197         Map<String, Object> parameters = new HashMap<>();
198         for (int i = 1; i < params.length; i++) {
199             String[] param = StringUtils.split(params[i], "=");
200             if (param.length == 1) {
201                 throw new XmlPullParserException("Missing 'key=value' pair for macro parameter: " + params[i]);
202             }
203 
204             String key = unescapeForMacro(param[0]);
205             String value = unescapeForMacro(param[1]);
206             parameters.put(key, value);
207         }
208 
209         MacroRequest request = new MacroRequest(sourceContent, new Xhtml5Parser(), parameters, getBasedir());
210 
211         try {
212             executeMacro(macroName, request, sink);
213         } catch (MacroExecutionException e) {
214             throw new XmlPullParserException("Unable to execute macro in the document: " + macroName);
215         } catch (MacroNotFoundException me) {
216             throw new XmlPullParserException("Macro not found: " + macroName);
217         }
218     }
219 
220     /**
221      * escapeForMacro
222      *
223      * @param s String
224      * @return String
225      */
226     private String escapeForMacro(String s) {
227         if (s == null || s.length() < 1) {
228             return s;
229         }
230 
231         String result = s;
232 
233         // use some outrageously out-of-place chars for text
234         // (these are device control one/two in unicode)
235         result = StringUtils.replace(result, "\\=", "\u0011");
236         result = StringUtils.replace(result, "\\|", "\u0012");
237 
238         return result;
239     }
240 
241     /**
242      * unescapeForMacro
243      *
244      * @param s String
245      * @return String
246      */
247     private String unescapeForMacro(String s) {
248         if (s == null || s.length() < 1) {
249             return s;
250         }
251 
252         String result = s;
253 
254         result = StringUtils.replace(result, "\u0011", "=");
255         result = StringUtils.replace(result, "\u0012", "|");
256 
257         return result;
258     }
259 
260     /**
261      * {@inheritDoc}
262      */
263     protected void init() {
264         super.init();
265 
266         this.source = false;
267         this.isEmptyElement = false;
268     }
269 
270     /** {@inheritDoc} */
271     public void parse(Reader source, Sink sink, String reference) throws ParseException {
272         this.sourceContent = null;
273 
274         try (Reader reader = source) {
275             StringWriter contentWriter = new StringWriter();
276             IOUtils.copy(reader, contentWriter);
277             sourceContent = contentWriter.toString();
278         } catch (IOException ex) {
279             throw new ParseException("Error reading the input source", ex);
280         }
281 
282         try {
283             super.parse(new StringReader(sourceContent), sink, reference);
284         } finally {
285             this.sourceContent = null;
286         }
287     }
288 }