View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.doxia.module.xhtml5;
20  
21  import javax.inject.Named;
22  import javax.inject.Singleton;
23  import javax.swing.text.html.HTML.Attribute;
24  
25  import java.io.IOException;
26  import java.io.Reader;
27  import java.io.StringReader;
28  import java.io.StringWriter;
29  import java.util.LinkedHashMap;
30  import java.util.Map;
31  
32  import org.apache.commons.io.IOUtils;
33  import org.apache.maven.doxia.macro.MacroExecutionException;
34  import org.apache.maven.doxia.macro.MacroRequest;
35  import org.apache.maven.doxia.macro.manager.MacroNotFoundException;
36  import org.apache.maven.doxia.parser.ParseException;
37  import org.apache.maven.doxia.parser.Xhtml5BaseParser;
38  import org.apache.maven.doxia.sink.Sink;
39  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
40  import org.apache.maven.doxia.util.DoxiaStringUtils;
41  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
42  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
43  import org.slf4j.Logger;
44  import org.slf4j.LoggerFactory;
45  
46  /**
47   * Parse an xhtml5 model and emit events into a Doxia Sink.
48   */
49  @Singleton
50  @Named("xhtml")
51  public class Xhtml5Parser extends Xhtml5BaseParser implements Xhtml5Markup {
52      private static final Logger LOGGER = LoggerFactory.getLogger(Xhtml5Parser.class);
53  
54      /** For verbatim source. */
55      protected boolean source;
56  
57      /** Empty elements don't write a closing tag. */
58      private boolean isEmptyElement;
59  
60      /**
61       * The source content of the input reader. Used to pass into macros.
62       */
63      private String sourceContent;
64  
65      protected void handleStartTag(XmlPullParser parser, Sink sink)
66              throws XmlPullParserException, MacroExecutionException {
67          isEmptyElement = parser.isEmptyElementTag();
68  
69          SinkEventAttributeSet attribs = getAttributesFromParser(parser);
70  
71          if (parser.getName().equals(HTML.toString())) {
72              // Do nothing
73              return;
74          } else if (parser.getName().equals(HEAD.toString())) {
75              sink.head(attribs);
76          } else if (parser.getName().equals(TITLE.toString())) {
77              sink.title(attribs);
78          } else if (parser.getName().equals(META.toString())) {
79              String name = parser.getAttributeValue(null, Attribute.NAME.toString());
80              String content = parser.getAttributeValue(null, Attribute.CONTENT.toString());
81  
82              if ("author".equals(name)) {
83                  sink.author(null);
84  
85                  sink.text(content);
86  
87                  sink.author_();
88              } else if ("date".equals(name)) {
89                  sink.date(null);
90  
91                  sink.text(content);
92  
93                  sink.date_();
94              } else {
95                  sink.unknown("meta", new Object[] {TAG_TYPE_SIMPLE}, attribs);
96              }
97          }
98          /*
99           * The ADDRESS element may be used by authors to supply contact information
100          * for a model or a major part of a model such as a form. This element
101          *  often appears at the beginning or end of a model.
102          */
103         else if (parser.getName().equals(ADDRESS.toString())) {
104             sink.address(attribs);
105         } else if (parser.getName().equals(BODY.toString())) {
106             sink.body(attribs);
107         } else if (parser.getName().equals(DIV.toString())) {
108             String divClass = parser.getAttributeValue(null, Attribute.CLASS.toString());
109 
110             if ("verbatim source".equals(divClass)) {
111                 this.source = true;
112             }
113 
114             baseStartTag(parser, sink); // pick up other divs
115         }
116         /*
117          * The PRE element tells visual user agents that the enclosed text is
118          * "preformatted". When handling preformatted text, visual user agents:
119          * - May leave white space intact.
120          * - May render text with a fixed-pitch font.
121          * - May disable automatic word wrap.
122          * - Must not disable bidirectional processing.
123          * Non-visual user agents are not required to respect extra white space
124          * in the content of a PRE element.
125          */
126         else if (parser.getName().equals(PRE.toString())) {
127             if (source) {
128                 attribs.addAttributes(SinkEventAttributeSet.SOURCE);
129             }
130 
131             verbatim();
132 
133             sink.verbatim(attribs);
134         } else if (!baseStartTag(parser, sink)) {
135             if (isEmptyElement) {
136                 handleUnknown(parser, sink, TAG_TYPE_SIMPLE);
137             } else {
138                 handleUnknown(parser, sink, TAG_TYPE_START);
139             }
140 
141             LOGGER.warn(
142                     "Unrecognized xhtml5 tag <{}> at [{}:{}]",
143                     parser.getName(),
144                     parser.getLineNumber(),
145                     parser.getColumnNumber());
146         }
147     }
148 
149     protected void handleEndTag(XmlPullParser parser, Sink sink)
150             throws XmlPullParserException, MacroExecutionException {
151         if (parser.getName().equals(HTML.toString())) {
152             // Do nothing
153             return;
154         } else if (parser.getName().equals(HEAD.toString())) {
155             sink.head_();
156         } else if (parser.getName().equals(TITLE.toString())) {
157             sink.title_();
158         } else if (parser.getName().equals(BODY.toString())) {
159             emitHeadingSections(0, sink, false);
160 
161             sink.body_();
162         } else if (parser.getName().equals(ADDRESS.toString())) {
163             sink.address_();
164         } else if (parser.getName().equals(DIV.toString())) {
165             this.source = false;
166             baseEndTag(parser, sink);
167         } else if (!baseEndTag(parser, sink)) {
168             if (!isEmptyElement) {
169                 handleUnknown(parser, sink, TAG_TYPE_END);
170             }
171         }
172 
173         isEmptyElement = false;
174     }
175 
176     @Override
177     protected void handleComment(XmlPullParser parser, Sink sink) throws XmlPullParserException {
178         String text = getText(parser).trim();
179 
180         if (text.startsWith("MACRO") && !isSecondParsing()) {
181             processMacro(text, sink);
182         } else {
183             super.handleComment(parser, sink);
184         }
185     }
186 
187     /** process macro embedded in XHTML commment */
188     private void processMacro(String text, Sink sink) throws XmlPullParserException {
189         String s = text.substring(text.indexOf('{') + 1, text.indexOf('}'));
190         s = escapeForMacro(s);
191         String[] params = DoxiaStringUtils.split(s, "|");
192         String macroName = params[0];
193 
194         Map<String, Object> parameters = new LinkedHashMap<>();
195         for (int i = 1; i < params.length; i++) {
196             String[] param = DoxiaStringUtils.split(params[i], "=");
197             if (param.length == 1) {
198                 throw new XmlPullParserException("Missing 'key=value' pair for macro parameter: " + params[i]);
199             }
200 
201             String key = unescapeForMacro(param[0]);
202             String value = unescapeForMacro(param[1]);
203             parameters.put(key, value);
204         }
205 
206         MacroRequest request = new MacroRequest(sourceContent, new Xhtml5Parser(), parameters, getBasedir());
207 
208         try {
209             executeMacro(macroName, request, sink);
210         } catch (MacroExecutionException e) {
211             throw new XmlPullParserException("Unable to execute macro in the document: " + macroName);
212         } catch (MacroNotFoundException me) {
213             throw new XmlPullParserException("Macro not found: " + macroName);
214         }
215     }
216 
217     /**
218      * escapeForMacro
219      *
220      * @param s String
221      * @return String
222      */
223     private String escapeForMacro(String s) {
224         if (s == null || s.length() < 1) {
225             return s;
226         }
227 
228         String result = s;
229 
230         // use some outrageously out-of-place chars for text
231         // (these are device control one/two in unicode)
232         result = DoxiaStringUtils.replace(result, "\\=", "\u0011");
233         result = DoxiaStringUtils.replace(result, "\\|", "\u0012");
234 
235         return result;
236     }
237 
238     /**
239      * unescapeForMacro
240      *
241      * @param s String
242      * @return String
243      */
244     private String unescapeForMacro(String s) {
245         if (s == null || s.length() < 1) {
246             return s;
247         }
248 
249         String result = s;
250 
251         result = DoxiaStringUtils.replace(result, "\u0011", "=");
252         result = DoxiaStringUtils.replace(result, "\u0012", "|");
253 
254         return result;
255     }
256 
257     /**
258      * {@inheritDoc}
259      */
260     protected void init() {
261         super.init();
262 
263         this.source = false;
264         this.isEmptyElement = false;
265     }
266 
267     public void parse(Reader source, Sink sink, String reference) throws ParseException {
268         this.sourceContent = null;
269 
270         try (Reader reader = source) {
271             StringWriter contentWriter = new StringWriter();
272             IOUtils.copy(reader, contentWriter);
273             sourceContent = contentWriter.toString();
274         } catch (IOException ex) {
275             throw new ParseException("Error reading the input source", ex);
276         }
277 
278         try {
279             super.parse(new StringReader(sourceContent), sink, reference);
280         } finally {
281             this.sourceContent = null;
282         }
283     }
284 }